From 7b21f3a1e03f25e1dbc029d003378685a522b845 Mon Sep 17 00:00:00 2001 From: heinpa Date: Tue, 16 Apr 2024 15:36:14 +0200 Subject: [PATCH] update test for created SPARQL INSERT queries Also add ability to specify source language OR use langdetect --- .../component/mt_libretranslate.py | 24 +++++++---- .../pytest.ini | 14 ++++++- .../requirements.txt | 3 +- .../tests/test_mt_libretranslate.py | 40 ++++++++++++------- 4 files changed, 57 insertions(+), 24 deletions(-) diff --git a/qanary-component-MT-Python-LibreTranslate/component/mt_libretranslate.py b/qanary-component-MT-Python-LibreTranslate/component/mt_libretranslate.py index 7f888dcfc..419ac06b0 100644 --- a/qanary-component-MT-Python-LibreTranslate/component/mt_libretranslate.py +++ b/qanary-component-MT-Python-LibreTranslate/component/mt_libretranslate.py @@ -12,13 +12,13 @@ SERVICE_NAME_COMPONENT = os.environ["SERVICE_NAME_COMPONENT"] - -target_lang = 'en' +SOURCE_LANG = os.environ["SOURCE_LANGUAGE"] +#TARGET_LANG = os.environ["TARGET_LANGUAGE"] +TARGET_LANG = "en" # currently only supports English TRANSLATE_ENDPOINT = os.environ["TRANSLATE_ENDPOINT"] LANGUAGES_ENDPOINT = os.environ["LANGUAGES_ENDPOINT"] - @mt_libretranslate_bp.route("/annotatequestion", methods=["POST"]) def qanary_service(): """the POST endpoint required for a Qanary service""" @@ -29,18 +29,26 @@ def qanary_service(): logging.info("endpoint: %s, inGraph: %s, outGraph: %s" % \ (triplestore_endpoint, triplestore_ingraph, triplestore_outgraph)) - text = get_text_question_in_graph(triplestore_endpoint=triplestore_endpoint, + text = get_text_question_in_graph(triplestore_endpoint=triplestore_endpoint, graph=triplestore_ingraph)[0]["text"] question_uri = get_text_question_in_graph(triplestore_endpoint=triplestore_endpoint, graph=triplestore_ingraph)[0]["uri"] logging.info(f"Question text: {text}") + if SOURCE_LANG != None and len(SOURCE_LANG.strip()) > 0: + lang = SOURCE_LANG + logging.info("Using custom SOURCE_LANGUAGE") + else: + lang = detect(text) + logging.info("No SOURCE_LANGUAGE specified, using langdetect!") + logging.info(f"source language: {lang}") + #lang, prob = langid.classify(text) lang = detect(text) logging.info(f"source language: {lang}") ## TODO: MAIN FUNCTIONALITY - result, _ = translate_input(text, lang) + result, _ = translate_input(text, lang, TARGET_LANG) # building SPARQL query TODO: verify this annotation AnnotationOfQuestionTranslation ?? SPARQLqueryAnnotationOfQuestionTranslation = """ @@ -112,7 +120,7 @@ def index(): return "Python MT LibreTranslate Qanary component" -def translate_input(text, source_lang): +def translate_input(text, source_lang, target_lang): req_json = { 'q': text, @@ -135,11 +143,11 @@ def check_connection(): success = "The test translation was successful" try: # TODO: test with supported language? - t, error = translate_input("eingabe zum testen", "de") + t, error = translate_input("eingabe zum testen", "de", "en") logging.info(f"got translation: {t}") assert len(t) > 0 return True, success - except Exception: + except Exception: logging.info(f"test failed with {error}") return False, error diff --git a/qanary-component-MT-Python-LibreTranslate/pytest.ini b/qanary-component-MT-Python-LibreTranslate/pytest.ini index e610b3c2a..3f65a4ac2 100644 --- a/qanary-component-MT-Python-LibreTranslate/pytest.ini +++ b/qanary-component-MT-Python-LibreTranslate/pytest.ini @@ -1,3 +1,15 @@ [pytest] -log_cli = True +log_cli = 1 log_cli_level = INFO +log_cli_format = %(asctime)s [%(levelname)8s] [%(filename)s:%(lineno)s] %(message)s +log_cli_date_format=%Y-%m-%d %H:%M:%S +env = + SERVER_PORT=40120 + SPRING_BOOT_ADMIN_URL=http://qanary-pipeline-host:40111 + SERVER_HOST=http://public-component-host + SPRING_BOOT_ADMIN_USERNAME=admin + SPRING_BOOT_ADMIN_PASSWORD=admin + SERVICE_NAME_COMPONENT=LibreTranslate + TRANSLATE_ENDPOINT=http://localhost:5000/translate + LANGUAGES_ENDPOINT=http://localhost:5000/languages + SOURCE_LANGUAGE= diff --git a/qanary-component-MT-Python-LibreTranslate/requirements.txt b/qanary-component-MT-Python-LibreTranslate/requirements.txt index eb68c9274..80eaccc2d 100644 --- a/qanary-component-MT-Python-LibreTranslate/requirements.txt +++ b/qanary-component-MT-Python-LibreTranslate/requirements.txt @@ -1,4 +1,4 @@ -Flask==2.2.2 +Flask langdetect==1.0.9 langid==1.1.6 mock==3.0.5 @@ -6,3 +6,4 @@ python-dotenv==0.21.1 qanary_helpers==0.2.2 gunicorn==20.1.0 pytest +pytest-env diff --git a/qanary-component-MT-Python-LibreTranslate/tests/test_mt_libretranslate.py b/qanary-component-MT-Python-LibreTranslate/tests/test_mt_libretranslate.py index 2a363db84..2bb6dcfbf 100644 --- a/qanary-component-MT-Python-LibreTranslate/tests/test_mt_libretranslate.py +++ b/qanary-component-MT-Python-LibreTranslate/tests/test_mt_libretranslate.py @@ -9,12 +9,13 @@ class TestComponent(TestCase): logging.basicConfig(format='%(asctime)s - %(message)s', level=logging.INFO) - questions = list([{"uri": "urn:test-uri", "text": "test_input"}]) + questions = list([{"uri": "urn:test-uri", "text": "was ist ein Test?"}]) endpoint = "urn:qanary#test-endpoint" in_graph = "urn:qanary#test-inGraph" out_graph = "urn:qanary#test-outGraph" - source_language = "en" + source_language = "de" + target_language = "en" test_translation_placeholder = "test_translation" @@ -49,21 +50,32 @@ def test_qanary_service(self): # when a call to /annotatequestion is made response_json = client.post("/annotatequestion", headers = self.headers, data = self.request_data) - # then - # the text question is retrieved from the triplestore + # then the text question is retrieved from the triplestore mocked_get_text_question_in_graph.assert_called_with(triplestore_endpoint=self.endpoint, graph=self.in_graph) - # new information is pushed to the triplestore - mocked_insert_into_triplestore.assert_called() + # get arguments of the (2) separate insert calls + arg_list = mocked_insert_into_triplestore.call_args_list + # get the call arguments for question translation + call_args_translation = [a.args for a in arg_list if "AnnotationOfQuestionTranslation" in a.args[1]] + assert len(call_args_translation) == 1 + # get the call arguments for question language + call_args_language = [a.args for a in arg_list if "AnnotationOfQuestionLanguage" in a.args[1]] + assert len(call_args_language) == 1 - args = mocked_insert_into_triplestore.call_args.args - query_stored = re.sub(r"(\\n\W*|\n\W*)", " ", args[1]) + # clean query strings + query_translation = re.sub(r"(\\n\W*|\n\W*)", " ", call_args_translation[0][1]) + query_language = re.sub(r"(\\n\W*|\n\W*)", " ", call_args_language[0][1]) - # the source language is correctly identified and annotated - self.assertRegex(query_stored, r".*AnnotationOfQuestionLanguage(.*;\W?)*oa:hasBody \""+self.source_language+r"\".*\.") - # the question is translated and the result is annotated - assert self.test_translation_placeholder in query_stored.lower() + # then the triplestore is updated twice + # (question language and translation) + assert mocked_insert_into_triplestore.call_count == 2 - # the response is not empty - assert response_json != None + # then the source language is correctly identified and annotated + self.assertRegex(query_language, r".*AnnotationOfQuestionLanguage(.*;\W?)*oa:hasBody \""+self.source_language+r"\".*\.") + + # then the question is translated and the result is annotated + self.assertRegex(query_translation, r".*AnnotationOfQuestionTranslation(.*;\W?)*oa:hasBody \".*\"@" + self.target_language + r".*\.") + assert "@"+self.target_language in query_translation.lower() + # then the response is not empty + assert response_json != None