diff --git a/inspirehep/modules/workflows/tasks/actions.py b/inspirehep/modules/workflows/tasks/actions.py index 8042aad0ed..008e8f6596 100644 --- a/inspirehep/modules/workflows/tasks/actions.py +++ b/inspirehep/modules/workflows/tasks/actions.py @@ -1155,6 +1155,15 @@ def check_if_core_and_uk_in_fulltext(obj, eng): return regex.search(fulltext) +def check_if_uk_in_raw_affiliations(obj, eng): + raw_affs = get_value(obj.data, 'authors.raw_affiliations.value', []) + regex = re.compile( + r"\b(UK|United\s+Kingdom|England|Scotland|Northern\s+Ireland)\b", re.UNICODE | re.IGNORECASE) + for aff in chain.from_iterable(raw_affs): + if regex.search(aff): + return True + + def load_record_from_hep(obj, wf): control_number = obj.data['control_number'] pid_type = get_pid_type_from_schema(obj.data['$schema']) diff --git a/inspirehep/modules/workflows/workflows/article.py b/inspirehep/modules/workflows/workflows/article.py index 650b0afd8b..4c75332f71 100644 --- a/inspirehep/modules/workflows/workflows/article.py +++ b/inspirehep/modules/workflows/workflows/article.py @@ -79,7 +79,8 @@ check_if_germany_in_fulltext, check_if_germany_in_raw_affiliations, link_institutions_with_affiliations, - check_if_core_and_uk_in_fulltext + check_if_core_and_uk_in_fulltext, + check_if_uk_in_raw_affiliations, ) from inspirehep.modules.workflows.tasks.classifier import ( @@ -310,6 +311,15 @@ context_factory=curation_ticket_context, ticket_id_key='curation_ticket_id', ), + ), + IF( + check_if_uk_in_raw_affiliations, + create_ticket( + template='literaturesuggest/tickets/curation_core.html', + queue='UK_curation', + context_factory=curation_ticket_context, + ticket_id_key='curation_ticket_id', + ), ) ] ) diff --git a/tests/unit/workflows/test_workflows_actions.py b/tests/unit/workflows/test_workflows_actions.py index 0e090def9f..fe255ee3dc 100644 --- a/tests/unit/workflows/test_workflows_actions.py +++ b/tests/unit/workflows/test_workflows_actions.py @@ -36,7 +36,7 @@ from inspirehep.modules.workflows.actions import MatchApproval, MergeApproval from mocks import MockEng, MockObj -from inspirehep.modules.workflows.tasks.actions import jlab_ticket_needed, load_from_source_data, \ +from inspirehep.modules.workflows.tasks.actions import check_if_uk_in_raw_affiliations, jlab_ticket_needed, load_from_source_data, \ extract_authors_from_pdf, is_suitable_for_pdf_authors_extraction, is_fermilab_report, add_collection, \ check_if_france_in_fulltext, check_if_france_in_raw_affiliations, check_if_germany_in_fulltext, \ check_if_germany_in_raw_affiliations, check_if_core_and_uk_in_fulltext @@ -746,3 +746,58 @@ def test_check_if_uk_in_fulltext_core_case_insensitive(mocked_get_document, app) obj, eng) assert uk_in_fulltext_and_core + + +def test_check_if_uk_in_affiliations(app): + obj = MagicMock() + obj.extra_data = {} + obj.data = { + 'authors': [ + {"full_name": "author 1", + "raw_affiliations": [{"value": "Lorem ipsum dolor sit amet, consetetur sadipscing elitr, sed diam, 91405, UK"}] + + } + ] + } + result = check_if_uk_in_raw_affiliations(obj, None) + assert result + obj.data = { + 'authors': [ + {"full_name": "author 1", + "raw_affiliations": [{"value": "Lorem ipsum dolor united kingdom amet, consetetur sadipscing elitr, sed diam, 91405"}] + + } + ] + } + result = check_if_uk_in_raw_affiliations(obj, None) + assert result + obj.data = { + 'authors': [ + {"full_name": "author 1", + "raw_affiliations": [{"value": "Lorem ipsum dolor sit amet, Scotland sadipscing elitr, sed diam, 91405"}] + + } + ] + } + result = check_if_uk_in_raw_affiliations(obj, None) + assert result + obj.data = { + 'authors': [ + {"full_name": "author 1", + "raw_affiliations": [{"value": "Lorem engLand dolor sit amet, sadipscing elitr, sed diam, 91405"}] + + } + ] + } + result = check_if_uk_in_raw_affiliations(obj, None) + assert result + obj.data = { + 'authors': [ + {"full_name": "author 1", + "raw_affiliations": [{"value": "Lorem ipsum dolor sit amet, Northern ireland, sed diam, 91405"}] + + } + ] + } + result = check_if_uk_in_raw_affiliations(obj, None) + assert result