From e86e5059d06ed212e882a285823fecc3fdf04c5d Mon Sep 17 00:00:00 2001 From: Sebastian Wolfschmidt Date: Wed, 17 Apr 2024 14:54:42 +0200 Subject: [PATCH 1/2] add test for case where language cannot be detected --- .pre-commit-config.yaml | 2 +- tests/core/test_detect_language.py | 13 +++++++++++++ 2 files changed, 14 insertions(+), 1 deletion(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 3af4d8720..fe59b8084 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -51,7 +51,7 @@ repos: args: [ "-L", - "newyorker,te,responde,ist,als,oder,technik,sie,rouge,unter,juli,fiel,couldn,mke, vor", + "newyorker,te,responde,ist,als,oder,technik,sie,rouge,unter,juli,fiel,couldn,mke, vor,fille,ans", ] exclude: '^(poetry\.lock|trace-viewer/.*|tests/connectors/retrievers/test_document_index_retriever\.py|src/intelligence_layer/use_cases/qa/multiple_chunk_qa.py|src/intelligence_layer/use_cases/summarize/.*|tests/connectors/retrievers/test_document_index_retriever\.py|src/intelligence_layer/use_cases/classify/keyword_extract.py|tests/use_cases/summarize/test_single_chunk_few_shot_summarize.py|tests/use_cases/summarize/very_long_text.txt)$' - repo: https://github.com/akaihola/darglint2 diff --git a/tests/core/test_detect_language.py b/tests/core/test_detect_language.py index cfe9b1980..2d16d73cf 100644 --- a/tests/core/test_detect_language.py +++ b/tests/core/test_detect_language.py @@ -17,3 +17,16 @@ def test_detect_language_returns_correct_language() -> None: output = task.run(input, tracer) assert output.best_fit == Language("en") + + +def test_detect_language_returns_non_if_no_language_can_be_detected() -> None: + text = "Je m’appelle Jessica. Je suis une fille, je suis française et j’ai treize ans." # codespell:ignore + task = DetectLanguage() + input = DetectLanguageInput( + text=text, + possible_languages=[Language(lang) for lang in ["en", "de"]], + ) + tracer = NoOpTracer() + output = task.run(input, tracer) + + assert output.best_fit is None From f6ea3134211ceb7be2ca6701da0b295d7c507cd7 Mon Sep 17 00:00:00 2001 From: Sebastian Wolfschmidt Date: Wed, 17 Apr 2024 15:07:32 +0200 Subject: [PATCH 2/2] add parameterized for different languages --- tests/core/test_detect_language.py | 36 ++++++++++++++++++++++++++---- 1 file changed, 32 insertions(+), 4 deletions(-) diff --git a/tests/core/test_detect_language.py b/tests/core/test_detect_language.py index 2d16d73cf..7c46b94d2 100644 --- a/tests/core/test_detect_language.py +++ b/tests/core/test_detect_language.py @@ -1,3 +1,5 @@ +import pytest + from intelligence_layer.core import ( DetectLanguage, DetectLanguageInput, @@ -6,17 +8,43 @@ ) -def test_detect_language_returns_correct_language() -> None: - text = "Hello, my name is Niklas. I am working with Pit on this language detection piece." +@pytest.mark.parametrize( + "text_input,expected_language", + [ + ( + "Hello, my name is Niklas. I am working with Pit on this language detection piece.", + Language("en"), + ), + ( + "Hola, mi nombre es Niklas. Estoy trabajando con Pit en esta pieza de detección de idioma.", + Language("es"), + ), + ( + "Ciao, mi chiamo Niklas. Sto lavorando con Pit su questo pezzo di rilevamento della lingua.", + Language("it"), + ), + ( + "Hallo, mein Name ist Niklas. Ich arbeite mit Pit an diesem Stück zur Spracherkennung.", + Language("de"), + ), + ( + "Bonjour, je m'appelle Niklas. Je travaille avec Pit sur cette pièce de détection de langue.", + Language("fr"), + ), + ], +) +def test_detect_language_returns_correct_language( + text_input: str, expected_language: Language +) -> None: task = DetectLanguage() input = DetectLanguageInput( - text=text, + text=text_input, possible_languages=[Language(lang) for lang in ["en", "de", "fr", "it", "es"]], ) tracer = NoOpTracer() output = task.run(input, tracer) - assert output.best_fit == Language("en") + assert output.best_fit == expected_language def test_detect_language_returns_non_if_no_language_can_be_detected() -> None: