From 4643b0171f37c1733438c6f7ee98eb0c7206fad8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Petr=20Ja=C5=A1ek?= Date: Fri, 20 Dec 2024 12:51:12 +0100 Subject: [PATCH] fix fix_mediaformat command (#228) allow to set any code based on a query CPCN-803 --- server/cp/commands/__init__.py | 2 +- server/cp/commands/fix_language.py | 6 +++-- server/cp/commands/fix_mediaformat.py | 36 ++++++++++++++++++++------- server/tests/test_commands.py | 16 +++++++++--- 4 files changed, 44 insertions(+), 16 deletions(-) diff --git a/server/cp/commands/__init__.py b/server/cp/commands/__init__.py index 574c052..f439a0a 100644 --- a/server/cp/commands/__init__.py +++ b/server/cp/commands/__init__.py @@ -1,2 +1,2 @@ from . import fix_language # noqa -from . import fix_mediaformat # noqa \ No newline at end of file +from . import fix_mediaformat # noqa diff --git a/server/cp/commands/fix_language.py b/server/cp/commands/fix_language.py index 25c01be..d83b106 100644 --- a/server/cp/commands/fix_language.py +++ b/server/cp/commands/fix_language.py @@ -1,4 +1,3 @@ - import time from superdesk import get_resource_service @@ -9,7 +8,10 @@ def fix_language(resource="items", limit=50, sleep_secs=2): service = get_resource_service(resource) - source = {"query": {"terms": {"language": ["en-CA", "en-US", "fr-CA"]}}, "size": 100} + source = { + "query": {"terms": {"language": ["en-CA", "en-US", "fr-CA"]}}, + "size": 100, + } for i in range(int(limit)): items = service.search(source) diff --git a/server/cp/commands/fix_mediaformat.py b/server/cp/commands/fix_mediaformat.py index 76fd985..e5ee5ee 100644 --- a/server/cp/commands/fix_mediaformat.py +++ b/server/cp/commands/fix_mediaformat.py @@ -1,35 +1,53 @@ import time +from typing import get_args from superdesk import get_resource_service -from cp.signals import get_media_type_name, get_media_type_scheme +from cp.signals import get_media_type_name, get_media_type_scheme, MediaType from newsroom.commands.manager import manager @manager.command -def fix_mediaformat(resource="items", limit=500, sleep_secs=2): +def fix_mediaformat( + resource="items", query="", code="wireaudio", limit=500, sleep_secs=2, dry_run=False +): + if not query: + print("Please provide a query to filter the items.") + return + if code not in get_args(MediaType): + print("Invalid media type code.") + return service = get_resource_service(resource) media_type_scheme = get_media_type_scheme() source = { "query": { - "bool": {"must_not": {"term": {"subject.scheme": media_type_scheme}}} + "bool": {"must": {"query_string": {"query": query}}}, }, "size": 100, + "from": 0, } - for i in range(int(limit)): - items = service.search(source) - if not items.count(): + for i in range(0, int(limit), source["size"]): + source["from"] = i + items = list(service.search(source)) + if not len(items): break for item in items: updates = {"subject": item["subject"].copy() if item.get("subject") else []} + updates["subject"] = [ + s for s in updates["subject"] if s.get("scheme") != media_type_scheme + ] updates["subject"].append( dict( - code="wiretext", - name=get_media_type_name("wiretext", item.get("language")), + code=code, + name=get_media_type_name(code, item.get("language")), scheme=media_type_scheme, ) ) - service.system_update(item["_id"], updates, item) + if dry_run: + print("Would update", item["_id"], "with", updates) + else: + print("Updating", item["_id"]) + service.system_update(item["_id"], updates, item) print(".", end="", flush=True) time.sleep(int(sleep_secs)) print("done.") diff --git a/server/tests/test_commands.py b/server/tests/test_commands.py index fc96596..b7f9126 100644 --- a/server/tests/test_commands.py +++ b/server/tests/test_commands.py @@ -5,12 +5,12 @@ def test_fix_mediaformat(app): app.data.insert( "items", [ - {"_id": "en", "language": "en", "type": "text"}, - {"_id": "fr", "language": "fr", "type": "text"}, + {"_id": "en", "language": "en", "type": "text", "headline": "foo"}, + {"_id": "fr", "language": "fr", "type": "text", "headline": "bar"}, ], ) - fix_mediaformat() + fix_mediaformat(query="headline:foo", code="wiretext", sleep_secs=0) en_item = app.data.find_one("items", req=None, _id="en") assert "subject" in en_item @@ -20,4 +20,12 @@ def test_fix_mediaformat(app): assert "mediaformat" == en_item["subject"][0]["scheme"] fr_item = app.data.find_one("items", req=None, _id="fr") - assert "Texte fil de presse" == fr_item["subject"][0]["name"] + assert "subject" not in fr_item, "Should not add subject to non-matching item" + + fix_mediaformat(query="headline:bar", code="wireaudio", sleep_secs=0) + + fr_item = app.data.find_one("items", req=None, _id="fr") + assert "subject" in fr_item + assert "wireaudio" == fr_item["subject"][0]["code"] + assert "Audio fil de presse" == fr_item["subject"][0]["name"] + assert "mediaformat" == fr_item["subject"][0]["scheme"]