Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Update Python MT component tests #2

Closed
wants to merge 5 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 3 additions & 1 deletion qanary-component-MT-Python-HelsinkiNLP/pytest.ini
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
[pytest]
log_cli = True
log_cli = 1
log_cli_level = INFO
log_cli_format = %(asctime)s [%(levelname)8s] [%(filename)s:%(lineno)s] %(message)s
log_cli_date_format=%Y-%m-%d %H:%M:%S
env =
SPRING_BOOT_ADMIN_URL=https://localhost:43740
SPRING_BOOT_ADMIN_USERNAME=admin
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,6 @@
from unittest.mock import patch
import re
from unittest import TestCase
#import pytest

class TestComponent(TestCase):

Expand All @@ -15,16 +14,16 @@ class TestComponent(TestCase):
out_graph = "urn:qanary#test-outGraph"

source_language = "de"
question_translation = "what is a test?"
target_language = "en"

request_data = '''{
"values": {
"urn:qanary#endpoint": "urn:qanary#test-endpoint",
"urn:qanary#inGraph": "urn:qanary#test-inGraph",
"urn:qanary#endpoint": "urn:qanary#test-endpoint",
"urn:qanary#inGraph": "urn:qanary#test-inGraph",
"urn:qanary#outGraph": "urn:qanary#test-outGraph"
},
"endpoint": "urn:qanary#test-endpoint",
"inGraph": "urn:qanary#test-inGraph",
"endpoint": "urn:qanary#test-endpoint",
"inGraph": "urn:qanary#test-inGraph",
"outGrpah": "urn:qanary#test-outGraph"
}'''

Expand All @@ -49,21 +48,32 @@ def test_qanary_service(self):
# when a call to /annotatequestion is made
response_json = client.post("/annotatequestion", headers = self.headers, data = self.request_data)

# then
# the text question is retrieved from the triplestore
# then the text question is retrieved from the triplestore
mocked_get_text_question_in_graph.assert_called_with(triplestore_endpoint=self.endpoint, graph=self.in_graph)

# new information is pushed to the triplestore
mocked_insert_into_triplestore.assert_called()
# get arguments of the (2) separate insert calls
arg_list = mocked_insert_into_triplestore.call_args_list
# get the call arguments for question translation
call_args_translation = [a.args for a in arg_list if "AnnotationOfQuestionTranslation" in a.args[1]]
assert len(call_args_translation) == 1
# get the call arguments for question language
call_args_language = [a.args for a in arg_list if "AnnotationOfQuestionLanguage" in a.args[1]]
assert len(call_args_language) == 1

args = mocked_insert_into_triplestore.call_args.args
query_stored = re.sub(r"(\\n\W*|\n\W*)", " ", args[1])
# clean query strings
query_translation = re.sub(r"(\\n\W*|\n\W*)", " ", call_args_translation[0][1])
query_language = re.sub(r"(\\n\W*|\n\W*)", " ", call_args_language[0][1])

# the source language is correctly identified and annotated
self.assertRegex(query_stored, r".*AnnotationOfQuestionLanguage(.*;\W?)*oa:hasBody \""+self.source_language+r"\".*\.")
# the question is translated and the result is annotated
assert self.question_translation in query_stored
# then the triplestore is updated twice
# (question language and translation)
assert mocked_insert_into_triplestore.call_count == 2

# the response is not empty
assert response_json != None
# then the source language is correctly identified and annotated
self.assertRegex(query_language, r".*AnnotationOfQuestionLanguage(.*;\W?)*oa:hasBody \""+self.source_language+r"\".*\.")

# then the question is translated and the result is annotated
self.assertRegex(query_translation, r".*AnnotationOfQuestionTranslation(.*;\W?)*oa:hasBody \".*\"@" + self.target_language + r".*\.")
assert "@"+self.target_language in query_translation.lower()

# then the response is not empty
assert response_json != None
Original file line number Diff line number Diff line change
Expand Up @@ -12,13 +12,13 @@

SERVICE_NAME_COMPONENT = os.environ["SERVICE_NAME_COMPONENT"]


target_lang = 'en'
SOURCE_LANG = os.environ["SOURCE_LANGUAGE"]
#TARGET_LANG = os.environ["TARGET_LANGUAGE"]
TARGET_LANG = "en" # currently only supports English
TRANSLATE_ENDPOINT = os.environ["TRANSLATE_ENDPOINT"]
LANGUAGES_ENDPOINT = os.environ["LANGUAGES_ENDPOINT"]



@mt_libretranslate_bp.route("/annotatequestion", methods=["POST"])
def qanary_service():
"""the POST endpoint required for a Qanary service"""
Expand All @@ -29,18 +29,26 @@ def qanary_service():
logging.info("endpoint: %s, inGraph: %s, outGraph: %s" % \
(triplestore_endpoint, triplestore_ingraph, triplestore_outgraph))

text = get_text_question_in_graph(triplestore_endpoint=triplestore_endpoint,
text = get_text_question_in_graph(triplestore_endpoint=triplestore_endpoint,
graph=triplestore_ingraph)[0]["text"]
question_uri = get_text_question_in_graph(triplestore_endpoint=triplestore_endpoint,
graph=triplestore_ingraph)[0]["uri"]
logging.info(f"Question text: {text}")

if SOURCE_LANG != None and len(SOURCE_LANG.strip()) > 0:
lang = SOURCE_LANG
logging.info("Using custom SOURCE_LANGUAGE")
else:
lang = detect(text)
logging.info("No SOURCE_LANGUAGE specified, using langdetect!")
logging.info(f"source language: {lang}")

#lang, prob = langid.classify(text)
lang = detect(text)
logging.info(f"source language: {lang}")

## TODO: MAIN FUNCTIONALITY
result, _ = translate_input(text, lang)
result, _ = translate_input(text, lang, TARGET_LANG)

# building SPARQL query TODO: verify this annotation AnnotationOfQuestionTranslation ??
SPARQLqueryAnnotationOfQuestionTranslation = """
Expand Down Expand Up @@ -112,7 +120,7 @@ def index():
return "Python MT LibreTranslate Qanary component"


def translate_input(text, source_lang):
def translate_input(text, source_lang, target_lang):

req_json = {
'q': text,
Expand All @@ -135,11 +143,11 @@ def check_connection():
success = "The test translation was successful"
try:
# TODO: test with supported language?
t, error = translate_input("eingabe zum testen", "de")
t, error = translate_input("eingabe zum testen", "de", "en")
logging.info(f"got translation: {t}")
assert len(t) > 0
return True, success
except Exception:
except Exception:
logging.info(f"test failed with {error}")
return False, error

Expand Down
14 changes: 13 additions & 1 deletion qanary-component-MT-Python-LibreTranslate/pytest.ini
Original file line number Diff line number Diff line change
@@ -1,3 +1,15 @@
[pytest]
log_cli = True
log_cli = 1
log_cli_level = INFO
log_cli_format = %(asctime)s [%(levelname)8s] [%(filename)s:%(lineno)s] %(message)s
log_cli_date_format=%Y-%m-%d %H:%M:%S
env =
SERVER_PORT=40120
SPRING_BOOT_ADMIN_URL=http://qanary-pipeline-host:40111
SERVER_HOST=http://public-component-host
SPRING_BOOT_ADMIN_USERNAME=admin
SPRING_BOOT_ADMIN_PASSWORD=admin
SERVICE_NAME_COMPONENT=LibreTranslate
TRANSLATE_ENDPOINT=http://localhost:5000/translate
LANGUAGES_ENDPOINT=http://localhost:5000/languages
SOURCE_LANGUAGE=
3 changes: 2 additions & 1 deletion qanary-component-MT-Python-LibreTranslate/requirements.txt
Original file line number Diff line number Diff line change
@@ -1,8 +1,9 @@
Flask==2.2.2
Flask
langdetect==1.0.9
langid==1.1.6
mock==3.0.5
python-dotenv==0.21.1
qanary_helpers==0.2.2
gunicorn==20.1.0
pytest
pytest-env
Original file line number Diff line number Diff line change
Expand Up @@ -9,12 +9,13 @@ class TestComponent(TestCase):

logging.basicConfig(format='%(asctime)s - %(message)s', level=logging.INFO)

questions = list([{"uri": "urn:test-uri", "text": "test_input"}])
questions = list([{"uri": "urn:test-uri", "text": "was ist ein Test?"}])
endpoint = "urn:qanary#test-endpoint"
in_graph = "urn:qanary#test-inGraph"
out_graph = "urn:qanary#test-outGraph"

source_language = "en"
source_language = "de"
target_language = "en"

test_translation_placeholder = "test_translation"

Expand Down Expand Up @@ -49,21 +50,32 @@ def test_qanary_service(self):
# when a call to /annotatequestion is made
response_json = client.post("/annotatequestion", headers = self.headers, data = self.request_data)

# then
# the text question is retrieved from the triplestore
# then the text question is retrieved from the triplestore
mocked_get_text_question_in_graph.assert_called_with(triplestore_endpoint=self.endpoint, graph=self.in_graph)

# new information is pushed to the triplestore
mocked_insert_into_triplestore.assert_called()
# get arguments of the (2) separate insert calls
arg_list = mocked_insert_into_triplestore.call_args_list
# get the call arguments for question translation
call_args_translation = [a.args for a in arg_list if "AnnotationOfQuestionTranslation" in a.args[1]]
assert len(call_args_translation) == 1
# get the call arguments for question language
call_args_language = [a.args for a in arg_list if "AnnotationOfQuestionLanguage" in a.args[1]]
assert len(call_args_language) == 1

args = mocked_insert_into_triplestore.call_args.args
query_stored = re.sub(r"(\\n\W*|\n\W*)", " ", args[1])
# clean query strings
query_translation = re.sub(r"(\\n\W*|\n\W*)", " ", call_args_translation[0][1])
query_language = re.sub(r"(\\n\W*|\n\W*)", " ", call_args_language[0][1])

# the source language is correctly identified and annotated
self.assertRegex(query_stored, r".*AnnotationOfQuestionLanguage(.*;\W?)*oa:hasBody \""+self.source_language+r"\".*\.")
# the question is translated and the result is annotated
assert self.test_translation_placeholder in query_stored.lower()
# then the triplestore is updated twice
# (question language and translation)
assert mocked_insert_into_triplestore.call_count == 2

# the response is not empty
assert response_json != None
# then the source language is correctly identified and annotated
self.assertRegex(query_language, r".*AnnotationOfQuestionLanguage(.*;\W?)*oa:hasBody \""+self.source_language+r"\".*\.")

# then the question is translated and the result is annotated
self.assertRegex(query_translation, r".*AnnotationOfQuestionTranslation(.*;\W?)*oa:hasBody \".*\"@" + self.target_language + r".*\.")
assert "@"+self.target_language in query_translation.lower()

# then the response is not empty
assert response_json != None
4 changes: 3 additions & 1 deletion qanary-component-MT-Python-MBart/pytest.ini
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
[pytest]
log_cli = True
log_cli = 1
log_cli_level = INFO
log_cli_format = %(asctime)s [%(levelname)8s] [%(filename)s:%(lineno)s] %(message)s
log_cli_date_format=%Y-%m-%d %H:%M:%S
env =
SERVER_PORT=40120
SPRING_BOOT_ADMIN_URL=http://qanary-pipeline-host:40111
Expand Down
45 changes: 28 additions & 17 deletions qanary-component-MT-Python-MBart/tests/test_mt_mbart_nlp.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
from component.mt_mbart_nlp import *
from component import app
from unittest.mock import patch
import mock
import re
from unittest import TestCase

Expand All @@ -16,16 +15,16 @@ class TestComponent(TestCase):
out_graph = "urn:qanary#test-outGraph"

source_language = "de"
question_translation = "what is a test?"
target_language = "en"

request_data = '''{
"values": {
"urn:qanary#endpoint": "urn:qanary#test-endpoint",
"urn:qanary#inGraph": "urn:qanary#test-inGraph",
"urn:qanary#endpoint": "urn:qanary#test-endpoint",
"urn:qanary#inGraph": "urn:qanary#test-inGraph",
"urn:qanary#outGraph": "urn:qanary#test-outGraph"
},
"endpoint": "urn:qanary#test-endpoint",
"inGraph": "urn:qanary#test-inGraph",
"endpoint": "urn:qanary#test-endpoint",
"inGraph": "urn:qanary#test-inGraph",
"outGrpah": "urn:qanary#test-outGraph"
}'''

Expand All @@ -47,20 +46,32 @@ def test_qanary_service(self):
# when a call to /annotatequestion is made
response_json = client.post("/annotatequestion", headers = self.headers, data = self.request_data)

# then
# the text question is retrieved from the triplestore
# then the text question is retrieved from the triplestore
mocked_get_text_question_in_graph.assert_called_with(triplestore_endpoint=self.endpoint, graph=self.in_graph)

# new information is pushed to the triplestore
mocked_insert_into_triplestore.assert_called()
# get arguments of the (2) separate insert calls
arg_list = mocked_insert_into_triplestore.call_args_list
# get the call arguments for question translation
call_args_translation = [a.args for a in arg_list if "AnnotationOfQuestionTranslation" in a.args[1]]
assert len(call_args_translation) == 1
# get the call arguments for question language
call_args_language = [a.args for a in arg_list if "AnnotationOfQuestionLanguage" in a.args[1]]
assert len(call_args_language) == 1

args = mocked_insert_into_triplestore.call_args.args
query_stored = re.sub(r"(\\n\W*|\n\W*)", " ", args[1])
# clean query strings
query_translation = re.sub(r"(\\n\W*|\n\W*)", " ", call_args_translation[0][1])
query_language = re.sub(r"(\\n\W*|\n\W*)", " ", call_args_language[0][1])

# the source language is correctly identified and annotated
self.assertRegex(query_stored, r".*AnnotationOfQuestionLanguage(.*;\W?)*oa:hasBody \""+self.source_language+r"\".*\.")
# the question is translated and the result is annotated
assert self.question_translation in query_stored.lower()
# then the triplestore is updated twice
# (question language and translation)
assert mocked_insert_into_triplestore.call_count == 2

# the response is not empty
# then the source language is correctly identified and annotated
self.assertRegex(query_language, r".*AnnotationOfQuestionLanguage(.*;\W?)*oa:hasBody \""+self.source_language+r"\".*\.")

# then the question is translated and the result is annotated
self.assertRegex(query_translation, r".*AnnotationOfQuestionTranslation(.*;\W?)*oa:hasBody \".*\"@" + self.target_language + r".*\.")
assert "@"+self.target_language in query_translation.lower()

# then the response is not empty
assert response_json != None
2 changes: 1 addition & 1 deletion qanary-component-MT-Python-NLLB/component/__init__.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
from component.mt_nllb import mt_nllb_bp
from flask import Flask

version = "0.1.2"
version = "0.1.3"

# default config file
configfile = "app.conf"
Expand Down
Loading
Loading