From a877822c6e1161bfde2976225b923cec0efa3be2 Mon Sep 17 00:00:00 2001 From: SebastianWolfschmidtAA <162735941+SebastianWolfschmidtAA@users.noreply.github.com> Date: Thu, 18 Apr 2024 09:07:31 +0200 Subject: [PATCH] F13 136 il add more tests for language detection task (#748) * add test for case where language cannot be detected * add parameterized test for different languages --- .pre-commit-config.yaml | 2 +- tests/core/test_detect_language.py | 49 +++++++++++++++++++++++++++--- 2 files changed, 46 insertions(+), 5 deletions(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 3af4d8720..fe59b8084 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -51,7 +51,7 @@ repos: args: [ "-L", - "newyorker,te,responde,ist,als,oder,technik,sie,rouge,unter,juli,fiel,couldn,mke, vor", + "newyorker,te,responde,ist,als,oder,technik,sie,rouge,unter,juli,fiel,couldn,mke, vor,fille,ans", ] exclude: '^(poetry\.lock|trace-viewer/.*|tests/connectors/retrievers/test_document_index_retriever\.py|src/intelligence_layer/use_cases/qa/multiple_chunk_qa.py|src/intelligence_layer/use_cases/summarize/.*|tests/connectors/retrievers/test_document_index_retriever\.py|src/intelligence_layer/use_cases/classify/keyword_extract.py|tests/use_cases/summarize/test_single_chunk_few_shot_summarize.py|tests/use_cases/summarize/very_long_text.txt)$' - repo: https://github.com/akaihola/darglint2 diff --git a/tests/core/test_detect_language.py b/tests/core/test_detect_language.py index cfe9b1980..7c46b94d2 100644 --- a/tests/core/test_detect_language.py +++ b/tests/core/test_detect_language.py @@ -1,3 +1,5 @@ +import pytest + from intelligence_layer.core import ( DetectLanguage, DetectLanguageInput, @@ -6,14 +8,53 @@ ) -def test_detect_language_returns_correct_language() -> None: - text = "Hello, my name is Niklas. I am working with Pit on this language detection piece." +@pytest.mark.parametrize( + "text_input,expected_language", + [ + ( + "Hello, my name is Niklas. I am working with Pit on this language detection piece.", + Language("en"), + ), + ( + "Hola, mi nombre es Niklas. Estoy trabajando con Pit en esta pieza de detección de idioma.", + Language("es"), + ), + ( + "Ciao, mi chiamo Niklas. Sto lavorando con Pit su questo pezzo di rilevamento della lingua.", + Language("it"), + ), + ( + "Hallo, mein Name ist Niklas. Ich arbeite mit Pit an diesem Stück zur Spracherkennung.", + Language("de"), + ), + ( + "Bonjour, je m'appelle Niklas. Je travaille avec Pit sur cette pièce de détection de langue.", + Language("fr"), + ), + ], +) +def test_detect_language_returns_correct_language( + text_input: str, expected_language: Language +) -> None: task = DetectLanguage() input = DetectLanguageInput( - text=text, + text=text_input, possible_languages=[Language(lang) for lang in ["en", "de", "fr", "it", "es"]], ) tracer = NoOpTracer() output = task.run(input, tracer) - assert output.best_fit == Language("en") + assert output.best_fit == expected_language + + +def test_detect_language_returns_non_if_no_language_can_be_detected() -> None: + text = "Je m’appelle Jessica. Je suis une fille, je suis française et j’ai treize ans." # codespell:ignore + task = DetectLanguage() + input = DetectLanguageInput( + text=text, + possible_languages=[Language(lang) for lang in ["en", "de"]], + ) + tracer = NoOpTracer() + output = task.run(input, tracer) + + assert output.best_fit is None