From ad36b25793ff5768920f33c148f90457cc5cacfa Mon Sep 17 00:00:00 2001 From: Peter Weber Date: Thu, 31 Oct 2024 10:53:28 +0100 Subject: [PATCH] places, concepts: GND closeMatch corrections Co-Authored-by: Peter Weber --- poetry.lock | 23 ++++--- .../alembic/d8536341fc5e_delete_identifier.py | 4 +- rero_mef/marctojson/do_gnd_concepts.py | 33 ++++++---- rero_mef/marctojson/do_gnd_places.py | 25 ++++++-- .../concepts/examples/xml_minimal_record.xml | 61 ++++++++++++++----- .../test_concepts_gnd_transformation.py | 23 ++++--- .../places/test_places_gnd_transformation.py | 10 +-- 7 files changed, 117 insertions(+), 62 deletions(-) diff --git a/poetry.lock b/poetry.lock index dd1a8c6d..f228d346 100644 --- a/poetry.lock +++ b/poetry.lock @@ -229,21 +229,20 @@ uvloop = ["uvloop (>=0.15.2)"] [[package]] name = "bleach" -version = "6.1.0" +version = "6.2.0" description = "An easy safelist-based HTML-sanitizing tool." optional = false -python-versions = ">=3.8" +python-versions = ">=3.9" files = [ - {file = "bleach-6.1.0-py3-none-any.whl", hash = "sha256:3225f354cfc436b9789c66c4ee030194bee0568fbf9cbdad3bc8b5c26c5f12b6"}, - {file = "bleach-6.1.0.tar.gz", hash = "sha256:0a31f1837963c41d46bbf1331b8778e1308ea0791db03cc4e7357b97cf42a8fe"}, + {file = "bleach-6.2.0-py3-none-any.whl", hash = "sha256:117d9c6097a7c3d22fd578fcd8d35ff1e125df6736f554da4e432fdd63f31e5e"}, + {file = "bleach-6.2.0.tar.gz", hash = "sha256:123e894118b8a599fd80d3ec1a6d4cc7ce4e5882b1317a7e1ba69b56e95f991f"}, ] [package.dependencies] -six = ">=1.9.0" webencodings = "*" [package.extras] -css = ["tinycss2 (>=1.1.0,<1.3)"] +css = ["tinycss2 (>=1.1.0,<1.5)"] [[package]] name = "blinker" @@ -4238,23 +4237,23 @@ tornado = ["tornado (>=5)"] [[package]] name = "setuptools" -version = "75.2.0" +version = "75.3.0" description = "Easily download, build, install, upgrade, and uninstall Python packages" optional = false python-versions = ">=3.8" files = [ - {file = "setuptools-75.2.0-py3-none-any.whl", hash = "sha256:a7fcb66f68b4d9e8e66b42f9876150a3371558f98fa32222ffaa5bced76406f8"}, - {file = "setuptools-75.2.0.tar.gz", hash = "sha256:753bb6ebf1f465a1912e19ed1d41f403a79173a9acf66a42e7e6aec45c3c16ec"}, + {file = "setuptools-75.3.0-py3-none-any.whl", hash = "sha256:f2504966861356aa38616760c0f66568e535562374995367b4e69c7143cf6bcd"}, + {file = "setuptools-75.3.0.tar.gz", hash = "sha256:fba5dd4d766e97be1b1681d98712680ae8f2f26d7881245f2ce9e40714f1a686"}, ] [package.extras] check = ["pytest-checkdocs (>=2.4)", "pytest-ruff (>=0.2.1)", "ruff (>=0.5.2)"] -core = ["importlib-metadata (>=6)", "importlib-resources (>=5.10.2)", "jaraco.collections", "jaraco.functools", "jaraco.text (>=3.7)", "more-itertools", "more-itertools (>=8.8)", "packaging", "packaging (>=24)", "platformdirs (>=2.6.2)", "tomli (>=2.0.1)", "wheel (>=0.43.0)"] +core = ["importlib-metadata (>=6)", "importlib-resources (>=5.10.2)", "jaraco.collections", "jaraco.functools", "jaraco.text (>=3.7)", "more-itertools", "more-itertools (>=8.8)", "packaging", "packaging (>=24)", "platformdirs (>=4.2.2)", "tomli (>=2.0.1)", "wheel (>=0.43.0)"] cover = ["pytest-cov"] doc = ["furo", "jaraco.packaging (>=9.3)", "jaraco.tidelift (>=1.4)", "pygments-github-lexers (==0.0.5)", "pyproject-hooks (!=1.1)", "rst.linker (>=1.9)", "sphinx (>=3.5)", "sphinx-favicon", "sphinx-inline-tabs", "sphinx-lint", "sphinx-notfound-page (>=1,<2)", "sphinx-reredirects", "sphinxcontrib-towncrier", "towncrier (<24.7)"] enabler = ["pytest-enabler (>=2.2)"] -test = ["build[virtualenv] (>=1.0.3)", "filelock (>=3.4.0)", "ini2toml[lite] (>=0.14)", "jaraco.develop (>=7.21)", "jaraco.envs (>=2.2)", "jaraco.path (>=3.2.0)", "jaraco.test", "packaging (>=23.2)", "pip (>=19.1)", "pyproject-hooks (!=1.1)", "pytest (>=6,!=8.1.*)", "pytest-home (>=0.5)", "pytest-perf", "pytest-subprocess", "pytest-timeout", "pytest-xdist (>=3)", "tomli-w (>=1.0.0)", "virtualenv (>=13.0.0)", "wheel (>=0.44.0)"] -type = ["importlib-metadata (>=7.0.2)", "jaraco.develop (>=7.21)", "mypy (==1.11.*)", "pytest-mypy"] +test = ["build[virtualenv] (>=1.0.3)", "filelock (>=3.4.0)", "ini2toml[lite] (>=0.14)", "jaraco.develop (>=7.21)", "jaraco.envs (>=2.2)", "jaraco.path (>=3.2.0)", "jaraco.test (>=5.5)", "packaging (>=23.2)", "pip (>=19.1)", "pyproject-hooks (!=1.1)", "pytest (>=6,!=8.1.*)", "pytest-home (>=0.5)", "pytest-perf", "pytest-subprocess", "pytest-timeout", "pytest-xdist (>=3)", "tomli-w (>=1.0.0)", "virtualenv (>=13.0.0)", "wheel (>=0.44.0)"] +type = ["importlib-metadata (>=7.0.2)", "jaraco.develop (>=7.21)", "mypy (==1.12.*)", "pytest-mypy"] [[package]] name = "sickle" diff --git a/rero_mef/alembic/d8536341fc5e_delete_identifier.py b/rero_mef/alembic/d8536341fc5e_delete_identifier.py index f0952382..6aafb378 100644 --- a/rero_mef/alembic/d8536341fc5e_delete_identifier.py +++ b/rero_mef/alembic/d8536341fc5e_delete_identifier.py @@ -64,7 +64,7 @@ def upgrade(): ids.append(id_) rec = agent_cls.get_record(id_) rec.pop("identifier", None) - rec.update(data=rec, bcommit=False, reindex=True) + rec.update(data=rec, dbcommit=False, reindex=True) if idx % 1000 == 0: print(f" {idx} commit", end=" | ", flush=True) db.session.commit() @@ -102,7 +102,7 @@ def downgrade(): ids.append(id_) rec = agent_cls.get_record(id_) rec["identifier"] = f'"{url}{rec.pid}"' - rec.update(data=rec, bcommit=False, reindex=True) + rec.update(data=rec, dbcommit=False, reindex=True) if idx % 1000 == 0: print(f" {idx} commit", end=" | ", flush=True) db.session.commit() diff --git a/rero_mef/marctojson/do_gnd_concepts.py b/rero_mef/marctojson/do_gnd_concepts.py index cfb7d355..d53e17b9 100644 --- a/rero_mef/marctojson/do_gnd_concepts.py +++ b/rero_mef/marctojson/do_gnd_concepts.py @@ -215,14 +215,8 @@ def trans_gnd_relation(self): if value: self.json_dict[relation] = value - def trans_gnd_classification(self): - """Transformation classification from field 686.""" - if self.logger and self.verbose: - self.logger.info("Call Function", "trans_gnd_classification") - # TODO: find classification - def trans_gnd_match(self): - """Transformation closeMatch and exactfrom field 750.""" + """Transformation closeMatch and exactMatch from field 750.""" if self.logger and self.verbose: self.logger.info("Call Function", "trans_gnd_match") for field_750 in self.marc.get_fields("750"): @@ -253,29 +247,42 @@ def trans_gnd_match(self): if authorized_ap := build_string_from_field( field=field_750, subfields=subfields, tag_grouping=tag_grouping ): - match = { + match_data = { "authorized_access_point": authorized_ap, "source": "GND", } + identified_by = [] + other_source = None for subfield_0 in field_750.get_subfields("0"): if subfield_0.startswith("http"): - match.setdefault("identifiedBy", []).append( + identified_by.insert( + 0, { "type": "uri", "value": subfield_0, - } + }, ) + if other_source: + identified_by[0]["source"] = other_source else: source, id_ = get_source_and_id(subfield_0) if source: - match.setdefault("identifiedBy", []).append( + insert_pos = -1 + if source != "GND": + other_source = source + match_data["source"] = other_source + insert_pos = 0 + identified_by.insert( + insert_pos, { "source": source, "type": "bf:Nbn", "value": id_, - } + }, ) - self.json_dict.setdefault(match_type, []).append(match) + if identified_by: + match_data["identifiedBy"] = identified_by + self.json_dict.setdefault(match_type, []).append(match_data) def trans_gnd_note(self): """Transformation notes from field. diff --git a/rero_mef/marctojson/do_gnd_places.py b/rero_mef/marctojson/do_gnd_places.py index 23e81f46..c84cc316 100644 --- a/rero_mef/marctojson/do_gnd_places.py +++ b/rero_mef/marctojson/do_gnd_places.py @@ -270,29 +270,42 @@ def trans_gnd_match(self): if authorized_ap := build_string_from_field( field=field_751, subfields=subfields, tag_grouping=tag_grouping ): - match = { + match_data = { "authorized_access_point": authorized_ap, "source": "GND", } + identified_by = [] + other_source = None for subfield_0 in field_751.get_subfields("0"): if subfield_0.startswith("http"): - match.setdefault("identifiedBy", []).append( + identified_by.insert( + 0, { "type": "uri", "value": subfield_0, - } + }, ) + if other_source: + identified_by[0]["source"] = other_source else: source, id_ = get_source_and_id(subfield_0) if source: - match.setdefault("identifiedBy", []).append( + insert_pos = -1 + if source != "GND": + other_source = source + match_data["source"] = other_source + insert_pos = 0 + identified_by.insert( + insert_pos, { "source": source, "type": "bf:Nbn", "value": id_, - } + }, ) - self.json_dict.setdefault(match_type, []).append(match) + if identified_by: + match_data["identifiedBy"] = identified_by + self.json_dict.setdefault(match_type, []).append(match_data) def trans_gnd_note(self): """Transformation notes from field. diff --git a/tests/unit/concepts/examples/xml_minimal_record.xml b/tests/unit/concepts/examples/xml_minimal_record.xml index 32257621..a24d3104 100644 --- a/tests/unit/concepts/examples/xml_minimal_record.xml +++ b/tests/unit/concepts/examples/xml_minimal_record.xml @@ -1,24 +1,55 @@ 00589nx a2200193 45 - - 027630501 - sudoc + + + Grand Larousse universel (art. : Livre) + - - frBN001940328 + + + Laval RVM (en ligne), 2004-11-23 + - - frBN000000089 + + + Mers profondément engagées dans la masse des continents + - - FRBNF118620892 - FRBNF11862089 + + Note interne - - http://viaf.org/viaf/124265140 - VIAF - VIAF - 20200302 + + + Voir le descripteur Opposition (science politique) + + + + + Combiner un des descripteurs Mouvements contestataires + + + + + Voir les vedettes : Mouvements contestataires ; Opposition + + + + + Voir les vedettes du type : Antifascisme ; Mouvements + + + + + Voir aux mouvements d'opposition particuliers, par ex. : Combat + + + + + Voir aussi aux mers et océans particuliers + + + + VF3, NC3, NC30 \ No newline at end of file diff --git a/tests/unit/concepts/test_concepts_gnd_transformation.py b/tests/unit/concepts/test_concepts_gnd_transformation.py index b7040ced..6dbf5d0b 100644 --- a/tests/unit/concepts/test_concepts_gnd_transformation.py +++ b/tests/unit/concepts/test_concepts_gnd_transformation.py @@ -174,12 +174,12 @@ def test_gnd_close_match(): "closeMatch": [ { "authorized_access_point": "Atlases", - "source": "GND", + "source": "DLC", "identifiedBy": [ { - "source": "GND", - "type": "bf:Nbn", - "value": "(DE-101)1134384173", + "source": "DLC", + "type": "uri", + "value": "http://id.loc.gov/authorities/subjects/sh85009231", }, { "source": "DLC", @@ -187,8 +187,9 @@ def test_gnd_close_match(): "value": "sh85009231", }, { - "type": "uri", - "value": "http://id.loc.gov/authorities/subjects/sh85009231", + "source": "GND", + "type": "bf:Nbn", + "value": "(DE-101)1134384173", }, ], } @@ -196,11 +197,15 @@ def test_gnd_close_match(): "exactMatch": [ { "authorized_access_point": "Atlas", - "source": "GND", + "source": "DNLM", "identifiedBy": [ - {"source": "GND", "type": "bf:Nbn", "value": "(DE-101)125348144X"}, + { + "source": "DNLM", + "type": "uri", + "value": "http://id.nlm.nih.gov/mesh/D020466", + }, {"source": "DNLM", "type": "bf:Nbn", "value": "D020466"}, - {"type": "uri", "value": "http://id.nlm.nih.gov/mesh/D020466"}, + {"source": "GND", "type": "bf:Nbn", "value": "(DE-101)125348144X"}, ], }, ], diff --git a/tests/unit/places/test_places_gnd_transformation.py b/tests/unit/places/test_places_gnd_transformation.py index c3144e81..e2af23e4 100644 --- a/tests/unit/places/test_places_gnd_transformation.py +++ b/tests/unit/places/test_places_gnd_transformation.py @@ -153,17 +153,17 @@ def test_gnd_close_match(): "exactMatch": [ { "authorized_access_point": "Venedig", - "source": "GND", + "source": "ZBW", "identifiedBy": [ { - "source": "GND", + "source": "ZBW", "type": "bf:Nbn", - "value": "(DE-101)997977663", + "value": "091419204", }, { - "source": "ZBW", + "source": "GND", "type": "bf:Nbn", - "value": "091419204", + "value": "(DE-101)997977663", }, ], }