Skip to content

Commit

Permalink
fix fix_mediaformat command (#228)
Browse files Browse the repository at this point in the history
allow to set any code based on a query

CPCN-803
  • Loading branch information
petrjasek authored Dec 20, 2024
1 parent cbd2956 commit 4643b01
Show file tree
Hide file tree
Showing 4 changed files with 44 additions and 16 deletions.
2 changes: 1 addition & 1 deletion server/cp/commands/__init__.py
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
from . import fix_language # noqa
from . import fix_mediaformat # noqa
from . import fix_mediaformat # noqa
6 changes: 4 additions & 2 deletions server/cp/commands/fix_language.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@

import time

from superdesk import get_resource_service
Expand All @@ -9,7 +8,10 @@
def fix_language(resource="items", limit=50, sleep_secs=2):
service = get_resource_service(resource)

source = {"query": {"terms": {"language": ["en-CA", "en-US", "fr-CA"]}}, "size": 100}
source = {
"query": {"terms": {"language": ["en-CA", "en-US", "fr-CA"]}},
"size": 100,
}

for i in range(int(limit)):
items = service.search(source)
Expand Down
36 changes: 27 additions & 9 deletions server/cp/commands/fix_mediaformat.py
Original file line number Diff line number Diff line change
@@ -1,35 +1,53 @@
import time

from typing import get_args
from superdesk import get_resource_service
from cp.signals import get_media_type_name, get_media_type_scheme
from cp.signals import get_media_type_name, get_media_type_scheme, MediaType
from newsroom.commands.manager import manager


@manager.command
def fix_mediaformat(resource="items", limit=500, sleep_secs=2):
def fix_mediaformat(
resource="items", query="", code="wireaudio", limit=500, sleep_secs=2, dry_run=False
):
if not query:
print("Please provide a query to filter the items.")
return
if code not in get_args(MediaType):
print("Invalid media type code.")
return
service = get_resource_service(resource)
media_type_scheme = get_media_type_scheme()
source = {
"query": {
"bool": {"must_not": {"term": {"subject.scheme": media_type_scheme}}}
"bool": {"must": {"query_string": {"query": query}}},
},
"size": 100,
"from": 0,
}
for i in range(int(limit)):
items = service.search(source)
if not items.count():
for i in range(0, int(limit), source["size"]):
source["from"] = i
items = list(service.search(source))
if not len(items):
break
for item in items:
updates = {"subject": item["subject"].copy() if item.get("subject") else []}
updates["subject"] = [
s for s in updates["subject"] if s.get("scheme") != media_type_scheme
]
updates["subject"].append(
dict(
code="wiretext",
name=get_media_type_name("wiretext", item.get("language")),
code=code,
name=get_media_type_name(code, item.get("language")),
scheme=media_type_scheme,
)
)

service.system_update(item["_id"], updates, item)
if dry_run:
print("Would update", item["_id"], "with", updates)
else:
print("Updating", item["_id"])
service.system_update(item["_id"], updates, item)
print(".", end="", flush=True)
time.sleep(int(sleep_secs))
print("done.")
16 changes: 12 additions & 4 deletions server/tests/test_commands.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,12 +5,12 @@ def test_fix_mediaformat(app):
app.data.insert(
"items",
[
{"_id": "en", "language": "en", "type": "text"},
{"_id": "fr", "language": "fr", "type": "text"},
{"_id": "en", "language": "en", "type": "text", "headline": "foo"},
{"_id": "fr", "language": "fr", "type": "text", "headline": "bar"},
],
)

fix_mediaformat()
fix_mediaformat(query="headline:foo", code="wiretext", sleep_secs=0)

en_item = app.data.find_one("items", req=None, _id="en")
assert "subject" in en_item
Expand All @@ -20,4 +20,12 @@ def test_fix_mediaformat(app):
assert "mediaformat" == en_item["subject"][0]["scheme"]

fr_item = app.data.find_one("items", req=None, _id="fr")
assert "Texte fil de presse" == fr_item["subject"][0]["name"]
assert "subject" not in fr_item, "Should not add subject to non-matching item"

fix_mediaformat(query="headline:bar", code="wireaudio", sleep_secs=0)

fr_item = app.data.find_one("items", req=None, _id="fr")
assert "subject" in fr_item
assert "wireaudio" == fr_item["subject"][0]["code"]
assert "Audio fil de presse" == fr_item["subject"][0]["name"]
assert "mediaformat" == fr_item["subject"][0]["scheme"]

0 comments on commit 4643b01

Please sign in to comment.