Skip to content

Commit

Permalink
Feature/mx 1604 wikidata extract multiple orgs (#223)
Browse files Browse the repository at this point in the history
# PR Context
A total search results is required for frontend operations to show
proper pagination. This PR implements an additional function to get just
the total count. Further context in mex-backend related PR comment:
robert-koch-institut/mex-backend#91 (comment)

# Added
- get count of found wikidata organizations
  • Loading branch information
mr-kamran-ali authored Jul 16, 2024
1 parent 489c037 commit aad788b
Show file tree
Hide file tree
Showing 3 changed files with 81 additions and 0 deletions.
2 changes: 2 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0

### Added

- get count of found wikidata organizations

### Changes

### Deprecated
Expand Down
36 changes: 36 additions & 0 deletions mex/common/wikidata/extract.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,42 @@ def search_organization_by_label(
return _get_organization_details(wd_item_id)


def get_count_of_found_organizations_by_label(
item_label: str,
lang: TextLanguage,
) -> int:
"""Get total count of searched organizations in wikidata.
Args:
item_label: Item title or label to be counted
lang: language of the label. Example: en, de
Returns:
count of found organizations
"""
connector = WikidataQueryServiceConnector.get()
item_label = item_label.replace('"', "")
query_string_new = (
"SELECT (COUNT(distinct ?item) AS ?count) "
"WHERE { "
"SERVICE wikibase:mwapi { "
'bd:serviceParam wikibase:api "EntitySearch" . '
'bd:serviceParam wikibase:endpoint "www.wikidata.org" . '
f'bd:serviceParam mwapi:search "{item_label}" . '
f'bd:serviceParam mwapi:language "{lang}" . '
"?item wikibase:apiOutputItem mwapi:item . "
"?num wikibase:apiOrdinal true . "
"} "
"?item (wdt:P31/wdt:P8225*/wdt:P279*) wd:Q43229. "
'SERVICE wikibase:label { bd:serviceParam wikibase:language "[AUTO_LANGUAGE],en,de". } ' # noqa: E501
"} "
"ORDER BY ASC(?num) "
)

result = connector.get_data_by_query(query_string_new)
return int(result[0]["count"]["value"])


def search_organizations_by_label(
item_label: str,
offset: int,
Expand Down
43 changes: 43 additions & 0 deletions tests/wikidata/test_extract.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
)
from mex.common.wikidata.extract import (
_get_organization_details,
get_count_of_found_organizations_by_label,
search_organization_by_label,
search_organizations_by_label,
)
Expand Down Expand Up @@ -49,6 +50,16 @@ def test_search_organizations_by_label() -> None:
assert search_result[0].labels.model_dump() == labels


@pytest.mark.integration
def test_get_count_of_found_organizations_by_label() -> None:
total_found_orgs = get_count_of_found_organizations_by_label(
item_label='Robert Koch Institute"',
lang=TextLanguage.EN,
)

assert total_found_orgs == 3


@pytest.mark.integration
def test_search_organization_by_label_for_none() -> None:
"""Test if None is returned when multiple organizations are found."""
Expand Down Expand Up @@ -392,6 +403,38 @@ def mocked_item_details_response() -> Any:
assert search_result[0].model_dump() == expected_organization


@pytest.mark.usefixtures(
"mocked_session_wikidata_query_service", "mocked_session_wikidata_api"
)
def test_get_count_of_found_organizations_by_label_mocked(
monkeypatch: MonkeyPatch,
) -> None:
expected_query_response = [
{
"count": {
"datatype": "http://www.w3.org/2001/XMLSchema#integer",
"type": "literal",
"value": "3",
}
}
]

def mocked_query_response() -> list[dict[str, dict[str, str]]]:
return expected_query_response

monkeypatch.setattr(
WikidataQueryServiceConnector,
"get_data_by_query",
lambda self, _: mocked_query_response(),
)

search_result = get_count_of_found_organizations_by_label(
item_label="TEST", lang=TextLanguage.EN
)

assert search_result == 3


@pytest.mark.integration
def test_get_organization_details() -> None:
expected = {
Expand Down

0 comments on commit aad788b

Please sign in to comment.