Skip to content

Commit

Permalink
update test for created SPARQL INSERT queries
Browse files Browse the repository at this point in the history
Also add ability to specify source language OR use langdetect
  • Loading branch information
heinpa committed Apr 16, 2024
1 parent 0f96829 commit 7b21f3a
Show file tree
Hide file tree
Showing 4 changed files with 57 additions and 24 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -12,13 +12,13 @@

SERVICE_NAME_COMPONENT = os.environ["SERVICE_NAME_COMPONENT"]


target_lang = 'en'
SOURCE_LANG = os.environ["SOURCE_LANGUAGE"]
#TARGET_LANG = os.environ["TARGET_LANGUAGE"]
TARGET_LANG = "en" # currently only supports English
TRANSLATE_ENDPOINT = os.environ["TRANSLATE_ENDPOINT"]
LANGUAGES_ENDPOINT = os.environ["LANGUAGES_ENDPOINT"]



@mt_libretranslate_bp.route("/annotatequestion", methods=["POST"])
def qanary_service():
"""the POST endpoint required for a Qanary service"""
Expand All @@ -29,18 +29,26 @@ def qanary_service():
logging.info("endpoint: %s, inGraph: %s, outGraph: %s" % \
(triplestore_endpoint, triplestore_ingraph, triplestore_outgraph))

text = get_text_question_in_graph(triplestore_endpoint=triplestore_endpoint,
text = get_text_question_in_graph(triplestore_endpoint=triplestore_endpoint,
graph=triplestore_ingraph)[0]["text"]
question_uri = get_text_question_in_graph(triplestore_endpoint=triplestore_endpoint,
graph=triplestore_ingraph)[0]["uri"]
logging.info(f"Question text: {text}")

if SOURCE_LANG != None and len(SOURCE_LANG.strip()) > 0:
lang = SOURCE_LANG
logging.info("Using custom SOURCE_LANGUAGE")
else:
lang = detect(text)
logging.info("No SOURCE_LANGUAGE specified, using langdetect!")
logging.info(f"source language: {lang}")

#lang, prob = langid.classify(text)
lang = detect(text)
logging.info(f"source language: {lang}")

## TODO: MAIN FUNCTIONALITY
result, _ = translate_input(text, lang)
result, _ = translate_input(text, lang, TARGET_LANG)

# building SPARQL query TODO: verify this annotation AnnotationOfQuestionTranslation ??
SPARQLqueryAnnotationOfQuestionTranslation = """
Expand Down Expand Up @@ -112,7 +120,7 @@ def index():
return "Python MT LibreTranslate Qanary component"


def translate_input(text, source_lang):
def translate_input(text, source_lang, target_lang):

req_json = {
'q': text,
Expand All @@ -135,11 +143,11 @@ def check_connection():
success = "The test translation was successful"
try:
# TODO: test with supported language?
t, error = translate_input("eingabe zum testen", "de")
t, error = translate_input("eingabe zum testen", "de", "en")
logging.info(f"got translation: {t}")
assert len(t) > 0
return True, success
except Exception:
except Exception:
logging.info(f"test failed with {error}")
return False, error

Expand Down
14 changes: 13 additions & 1 deletion qanary-component-MT-Python-LibreTranslate/pytest.ini
Original file line number Diff line number Diff line change
@@ -1,3 +1,15 @@
[pytest]
log_cli = True
log_cli = 1
log_cli_level = INFO
log_cli_format = %(asctime)s [%(levelname)8s] [%(filename)s:%(lineno)s] %(message)s
log_cli_date_format=%Y-%m-%d %H:%M:%S
env =
SERVER_PORT=40120
SPRING_BOOT_ADMIN_URL=http://qanary-pipeline-host:40111
SERVER_HOST=http://public-component-host
SPRING_BOOT_ADMIN_USERNAME=admin
SPRING_BOOT_ADMIN_PASSWORD=admin
SERVICE_NAME_COMPONENT=LibreTranslate
TRANSLATE_ENDPOINT=http://localhost:5000/translate
LANGUAGES_ENDPOINT=http://localhost:5000/languages
SOURCE_LANGUAGE=
3 changes: 2 additions & 1 deletion qanary-component-MT-Python-LibreTranslate/requirements.txt
Original file line number Diff line number Diff line change
@@ -1,8 +1,9 @@
Flask==2.2.2
Flask
langdetect==1.0.9
langid==1.1.6
mock==3.0.5
python-dotenv==0.21.1
qanary_helpers==0.2.2
gunicorn==20.1.0
pytest
pytest-env
Original file line number Diff line number Diff line change
Expand Up @@ -9,12 +9,13 @@ class TestComponent(TestCase):

logging.basicConfig(format='%(asctime)s - %(message)s', level=logging.INFO)

questions = list([{"uri": "urn:test-uri", "text": "test_input"}])
questions = list([{"uri": "urn:test-uri", "text": "was ist ein Test?"}])
endpoint = "urn:qanary#test-endpoint"
in_graph = "urn:qanary#test-inGraph"
out_graph = "urn:qanary#test-outGraph"

source_language = "en"
source_language = "de"
target_language = "en"

test_translation_placeholder = "test_translation"

Expand Down Expand Up @@ -49,21 +50,32 @@ def test_qanary_service(self):
# when a call to /annotatequestion is made
response_json = client.post("/annotatequestion", headers = self.headers, data = self.request_data)

# then
# the text question is retrieved from the triplestore
# then the text question is retrieved from the triplestore
mocked_get_text_question_in_graph.assert_called_with(triplestore_endpoint=self.endpoint, graph=self.in_graph)

# new information is pushed to the triplestore
mocked_insert_into_triplestore.assert_called()
# get arguments of the (2) separate insert calls
arg_list = mocked_insert_into_triplestore.call_args_list
# get the call arguments for question translation
call_args_translation = [a.args for a in arg_list if "AnnotationOfQuestionTranslation" in a.args[1]]
assert len(call_args_translation) == 1
# get the call arguments for question language
call_args_language = [a.args for a in arg_list if "AnnotationOfQuestionLanguage" in a.args[1]]
assert len(call_args_language) == 1

args = mocked_insert_into_triplestore.call_args.args
query_stored = re.sub(r"(\\n\W*|\n\W*)", " ", args[1])
# clean query strings
query_translation = re.sub(r"(\\n\W*|\n\W*)", " ", call_args_translation[0][1])
query_language = re.sub(r"(\\n\W*|\n\W*)", " ", call_args_language[0][1])

# the source language is correctly identified and annotated
self.assertRegex(query_stored, r".*AnnotationOfQuestionLanguage(.*;\W?)*oa:hasBody \""+self.source_language+r"\".*\.")
# the question is translated and the result is annotated
assert self.test_translation_placeholder in query_stored.lower()
# then the triplestore is updated twice
# (question language and translation)
assert mocked_insert_into_triplestore.call_count == 2

# the response is not empty
assert response_json != None
# then the source language is correctly identified and annotated
self.assertRegex(query_language, r".*AnnotationOfQuestionLanguage(.*;\W?)*oa:hasBody \""+self.source_language+r"\".*\.")

# then the question is translated and the result is annotated
self.assertRegex(query_translation, r".*AnnotationOfQuestionTranslation(.*;\W?)*oa:hasBody \".*\"@" + self.target_language + r".*\.")
assert "@"+self.target_language in query_translation.lower()

# then the response is not empty
assert response_json != None

0 comments on commit 7b21f3a

Please sign in to comment.