Skip to content

Commit

Permalink
agents concepts places: add other identifiers
Browse files Browse the repository at this point in the history
* Adds `identifiedBy` for `035` values with `$2` for IDREF.
* Update dependencies.

Co-Authored-by: Peter Weber <[email protected]>
  • Loading branch information
rerowep committed Jul 15, 2024
1 parent 0d16ac4 commit 1f7214e
Show file tree
Hide file tree
Showing 20 changed files with 459 additions and 359 deletions.
558 changes: 283 additions & 275 deletions poetry.lock

Large diffs are not rendered by default.

29 changes: 28 additions & 1 deletion rero_mef/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -96,7 +96,7 @@
#: Template for error pages.
THEME_ERROR_TEMPLATE = "rero_mef/page_error.html"

WEBPACKEXT_PROJECT = 'rero_mef.theme.webpack:project'
WEBPACKEXT_PROJECT = "rero_mef.theme.webpack:project"

# Email configuration
# ===================
Expand Down Expand Up @@ -532,44 +532,54 @@
source=dict(terms=dict(field="sources", size=30)),
deleted=dict(filter=dict(exists=dict(field="deleted"))),
deleted_entities=dict(filter=dict(exists=dict(field="*.deleted"))),
identifiedBy_source=dict(
terms=dict(field="*.identifiedBy.source", size=30)
),
),
filters=dict(
type=terms_filter("type"),
source=terms_filter("sources"),
deleted=exists_filter("deleted"),
deleted_entities=exists_filter("*.deleted"),
rero_double=terms_filter("rero.pid"),
identifiedBy_source=terms_filter("*.identifiedBy.source"),
),
),
viaf=dict(aggs=AgentViafRecord.aggregations(), filters=AgentViafRecord.filters()),
agents_gnd=dict(
aggs=dict(
type=dict(terms=dict(field="type", size=30)),
deleted=dict(filter=dict(exists=dict(field="deleted"))),
identifiedBy_source=dict(terms=dict(field="identifiedBy.source", size=30)),
),
filters=dict(
type=terms_filter("type"),
deleted=exists_filter("deleted"),
identifiedBy_source=terms_filter("identifiedBy.source"),
),
),
agents_idref=dict(
aggs=dict(
type=dict(terms=dict(field="type", size=30)),
deleted=dict(filter=dict(exists=dict(field="deleted"))),
identifiedBy_source=dict(terms=dict(field="identifiedBy.source", size=30)),
),
filters=dict(
type=terms_filter("type"),
deleted=exists_filter("deleted"),
identifiedBy_source=terms_filter("identifiedBy.source"),
),
),
agents_rero=dict(
aggs=dict(
type=dict(terms=dict(field="type", size=30)),
deleted=dict(filter=dict(exists=dict(field="deleted"))),
identifiedBy_source=dict(terms=dict(field="identifiedBy.source", size=30)),
),
filters=dict(
type=terms_filter("type"),
deleted=exists_filter("deleted"),
identifiedBy_source=terms_filter("identifiedBy.source"),
),
),
concepts_mef=dict(
Expand All @@ -578,13 +588,17 @@
source=dict(terms=dict(field="sources", size=30)),
deleted=dict(filter=dict(exists=dict(field="deleted"))),
deleted_entities=dict(filter=dict(exists=dict(field="*.deleted"))),
identifiedBy_source=dict(
terms=dict(field="*.identifiedBy.source", size=30)
),
),
filters=dict(
type=terms_filter("type"),
source=terms_filter("sources"),
deleted=exists_filter("deleted"),
deleted_entities=exists_filter("*.deleted"),
rero_double=terms_filter("rero.pid"),
identifiedBy_source=terms_filter("*.identifiedBy.source"),
),
),
concepts_rero=dict(
Expand All @@ -595,11 +609,14 @@
terms=dict(field="classification.classificationPortion", size=30)
),
deleted=dict(filter=dict(exists=dict(field="deleted"))),
identifiedBy_source=dict(terms=dict(field="identifiedBy.source", size=30)),
),
filters=dict(
type=terms_filter("type"),
classification=terms_filter("classification.name"),
classificationPortion=terms_filter("classification.classificationPortion"),
deleted=exists_filter("deleted"),
identifiedBy_source=terms_filter("identifiedBy.source"),
),
),
concepts_idref=dict(
Expand All @@ -610,11 +627,14 @@
terms=dict(field="classification.classificationPortion", size=30)
),
deleted=dict(filter=dict(exists=dict(field="deleted"))),
identifiedBy_source=dict(terms=dict(field="identifiedBy.source", size=30)),
),
filters=dict(
type=terms_filter("type"),
classification=terms_filter("classification.name"),
classificationPortion=terms_filter("classification.classificationPortion"),
deleted=exists_filter("deleted"),
identifiedBy_source=terms_filter("identifiedBy.source"),
),
),
places_mef=dict(
Expand All @@ -623,12 +643,16 @@
source=dict(terms=dict(field="sources", size=30)),
deleted=dict(filter=dict(exists=dict(field="deleted"))),
deleted_entities=dict(filter=dict(exists=dict(field="*.deleted"))),
identifiedBy_source=dict(
terms=dict(field="*.identifiedBy.source", size=30)
),
),
filters=dict(
type=terms_filter("type"),
source=terms_filter("sources"),
deleted=exists_filter("deleted"),
deleted_entities=exists_filter("*.deleted"),
identifiedBy_source=terms_filter("*.identifiedBy.source"),
),
),
places_idref=dict(
Expand All @@ -639,11 +663,14 @@
terms=dict(field="classification.classificationPortion", size=30)
),
deleted=dict(filter=dict(exists=dict(field="deleted"))),
identifiedBy_source=dict(terms=dict(field="identifiedBy.source", size=30)),
),
filters=dict(
type=terms_filter("type"),
classification=terms_filter("classification.name"),
classificationPortion=terms_filter("classification.classificationPortion"),
deleted=exists_filter("deleted"),
identifiedBy_source=terms_filter("identifiedBy.source"),
),
),
)
10 changes: 4 additions & 6 deletions rero_mef/jsonschemas/common/agent-v0.0.1.json
Original file line number Diff line number Diff line change
Expand Up @@ -86,15 +86,13 @@
"type": {
"enum": [
"bf:Local",
"uri"
"uri",
"bf:Nbn"
]
},
"source": {
"enum": [
"GND",
"RERO",
"IDREF"
]
"type": "string",
"minLength": 2
},
"value": {
"type": "string",
Expand Down
13 changes: 6 additions & 7 deletions rero_mef/jsonschemas/common/concept-v0.0.1.json
Original file line number Diff line number Diff line change
Expand Up @@ -48,15 +48,13 @@
"type": {
"enum": [
"bf:Local",
"uri"
"uri",
"bf:Nbn"
]
},
"source": {
"enum": [
"BNF",
"RERO",
"IDREF"
]
"type": "string",
"minLength": 2
},
"value": {
"type": "string",
Expand Down Expand Up @@ -232,7 +230,8 @@
"type": {
"enum": [
"bf:Local",
"uri"
"uri",
"bf:Nbn"
]
},
"source": {
Expand Down
13 changes: 6 additions & 7 deletions rero_mef/jsonschemas/common/place-v0.0.1.json
Original file line number Diff line number Diff line change
Expand Up @@ -47,15 +47,13 @@
"type": {
"enum": [
"bf:Local",
"uri"
"uri",
"bf:Nbn"
]
},
"source": {
"enum": [
"BNF",
"RERO",
"IDREF"
]
"type": "string",
"minLength": 2
},
"value": {
"type": "string",
Expand Down Expand Up @@ -231,7 +229,8 @@
"type": {
"enum": [
"bf:Local",
"uri"
"uri",
"bf:Nbn"
]
},
"source": {
Expand Down
21 changes: 20 additions & 1 deletion rero_mef/marctojson/do_idref_agent.py
Original file line number Diff line number Diff line change
Expand Up @@ -175,11 +175,30 @@ def trans_idref_relation_pid(self):
if self.logger and self.verbose:
self.logger.info("Call Function", "trans_idref_relation_pid")
for field_035 in self.marc.get_fields("035"):
if field_035.get("a") and field_035.get("9") and field_035["9"] == "sudoc":
subfield_a = field_035.get("a")
if isinstance(subfield_a, list):
subfield_a = subfield_a[0]
subfield_2 = field_035.get("2")
if isinstance(subfield_2, list):
subfield_2 = subfield_2[0]
subfield_9 = field_035.get("9")
if isinstance(subfield_9, list):
subfield_9 = subfield_9[0]
if subfield_a and subfield_9 == "sudoc":
self.json_dict["relation_pid"] = {
"value": field_035["a"],
"type": "redirect_from",
}
elif subfield_2:
identified_by = self.json_dict.get("identifiedBy", [])
identified_by.append(
{
"source": subfield_2.upper(),
"type": "uri" if subfield_a.startswith("http") else "bf:Nbn",
"value": subfield_a,
}
)
self.json_dict["identifiedBy"] = identified_by

def trans_idref_gender(self):
"""Transformation gender 120 $a a:female, b: male, -:not known."""
Expand Down
24 changes: 21 additions & 3 deletions rero_mef/marctojson/do_idref_concepts.py
Original file line number Diff line number Diff line change
Expand Up @@ -104,13 +104,31 @@ def trans_idref_relation_pid(self):
"""Transformation old pids 035 $a $9 = sudoc."""
if self.logger and self.verbose:
self.logger.info("Call Function", "trans_idref_relation_pid")
fields_035 = self.marc.get_fields("035")
for field_035 in fields_035:
if field_035.get("a") and field_035.get("9") and field_035["9"] == "sudoc":
for field_035 in self.marc.get_fields("035"):
subfield_a = field_035.get("a")
if isinstance(subfield_a, list):
subfield_a = subfield_a[0]
subfield_2 = field_035.get("2")
if isinstance(subfield_2, list):
subfield_2 = subfield_2[0]
subfield_9 = field_035.get("9")
if isinstance(subfield_9, list):
subfield_9 = subfield_9[0]
if subfield_a and subfield_9 == "sudoc":
self.json_dict["relation_pid"] = {
"value": field_035["a"],
"type": "redirect_from",
}
elif subfield_2:
identified_by = self.json_dict.get("identifiedBy", [])
identified_by.append(
{
"source": subfield_2.upper(),
"type": "uri" if subfield_a.startswith("http") else "bf:Nbn",
"value": subfield_a,
}
)
self.json_dict["identifiedBy"] = identified_by

def trans_idref_deleted(self):
"""Transformation deleted leader 5 == d."""
Expand Down
24 changes: 21 additions & 3 deletions rero_mef/marctojson/do_idref_places.py
Original file line number Diff line number Diff line change
Expand Up @@ -91,13 +91,31 @@ def trans_idref_relation_pid(self):
"""Transformation old pids 035 $a $9 = sudoc."""
if self.logger and self.verbose:
self.logger.info("Call Function", "trans_idref_relation_pid")
fields_035 = self.marc.get_fields("035")
for field_035 in fields_035:
if field_035.get("a") and field_035.get("9") and field_035["9"] == "sudoc":
for field_035 in self.marc.get_fields("035"):
subfield_a = field_035.get("a")
if isinstance(subfield_a, list):
subfield_a = subfield_a[0]
subfield_2 = field_035.get("2")
if isinstance(subfield_2, list):
subfield_2 = subfield_2[0]
subfield_9 = field_035.get("9")
if isinstance(subfield_9, list):
subfield_9 = subfield_9[0]
if subfield_a and subfield_9 == "sudoc":
self.json_dict["relation_pid"] = {
"value": field_035["a"],
"type": "redirect_from",
}
elif subfield_2:
identified_by = self.json_dict.get("identifiedBy", [])
identified_by.append(
{
"source": subfield_2.upper(),
"type": "uri" if subfield_a.startswith("http") else "bf:Nbn",
"value": subfield_a,
}
)
self.json_dict["identifiedBy"] = identified_by

def trans_idref_deleted(self):
"""Transformation deleted leader 5 == d."""
Expand Down
2 changes: 1 addition & 1 deletion scripts/test
Original file line number Diff line number Diff line change
Expand Up @@ -88,7 +88,7 @@ msg=$(safety check -o text ${safety_exceptions}) || {
exit 1
}
info_msg "Test black:"
black tests rero_mef --diff
black tests rero_mef tests --check
info_msg "Test pydocstyle:"
pydocstyle rero_mef tests docs
info_msg "Test isort:"
Expand Down
5 changes: 5 additions & 0 deletions tests/api/test_agents_gnd_rest.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,11 @@ def test_view_agents_gnd(client, agent_gnd_record):
"sum_other_doc_count": 0,
},
"deleted": {"doc_count": 0},
"identifiedBy_source": {
"buckets": [],
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
},
}

url = url_for("invenio_records_rest.aggnd_item", pid_value=pid)
Expand Down
5 changes: 5 additions & 0 deletions tests/api/test_agents_idref_rest.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,11 @@ def test_view_agents_idref(client, agent_idref_record):
"sum_other_doc_count": 0,
},
"deleted": {"doc_count": 0},
"identifiedBy_source": {
"buckets": [],
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
},
}

url = url_for("invenio_records_rest.aidref_item", pid_value=pid)
Expand Down
5 changes: 5 additions & 0 deletions tests/api/test_agents_mef_rest.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,11 @@ def test_view_agents_mef(
},
"deleted": {"doc_count": 0},
"deleted_entities": {"doc_count": 0},
"identifiedBy_source": {
"buckets": [],
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
},
}
url = url_for("invenio_records_rest.mef_item", pid_value=pid)
res = client.get(url)
Expand Down
5 changes: 5 additions & 0 deletions tests/api/test_agents_rero_rest.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,11 @@ def test_view_agents_rero(client, agent_rero_record):
"sum_other_doc_count": 0,
},
"deleted": {"doc_count": 0},
"identifiedBy_source": {
"buckets": [],
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
},
}

url = url_for("invenio_records_rest.agrero_item", pid_value=pid)
Expand Down
5 changes: 5 additions & 0 deletions tests/api/test_concepts_mef_rest.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,11 @@ def test_view_concepts_mef(
assert json_data["aggregations"] == {
"deleted": {"doc_count": 0},
"deleted_entities": {"doc_count": 0},
"identifiedBy_source": {
"buckets": [],
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
},
"source": {
"buckets": [
{"doc_count": 1, "key": "idref"},
Expand Down
Loading

0 comments on commit 1f7214e

Please sign in to comment.