diff --git a/deep_translator/__init__.py b/deep_translator/__init__.py index 2b0c9e0..839ced6 100644 --- a/deep_translator/__init__.py +++ b/deep_translator/__init__.py @@ -16,6 +16,8 @@ from deep_translator.qcri import QcriTranslator from deep_translator.tencent import TencentTranslator from deep_translator.yandex import YandexTranslator +from deep_translator.glosbe import GlosbeTranslator +from deep_translator.mint import WikimediaMinTMachineTranslator __author__ = """Nidhal Baccouri""" __email__ = "nidhalbacc@gmail.com" @@ -35,6 +37,8 @@ "ChatGptTranslator", "TencentTranslator", "BaiduTranslator", + "GlosbeTranslator", + "WikimediaMinTMachineTranslator", "single_detection", "batch_detection", ] diff --git a/deep_translator/constants.py b/deep_translator/constants.py index 447c81f..ecdd6e1 100644 --- a/deep_translator/constants.py +++ b/deep_translator/constants.py @@ -29,6 +29,8 @@ "LIBRE_FREE": "https://libretranslate.de/", "TENENT": "https://tmt.tencentcloudapi.com", "BAIDU": "https://fanyi-api.baidu.com/api/trans/vip/translate", + "GLOSBE": "https://translator-api.glosbe.com/translateByLangDetect", + "WIKIMEDIA_MINT": "https://translate.wmcloud.org/api/translate" } GOOGLE_LANGUAGES_TO_CODES = { @@ -664,3 +666,409 @@ "vietnamese": "vie", "yueyu": "yue", } + +GLOSBE_LANGUAGE_TO_CODE = { + "achinese": "ace", + "afrikaans": "af", + "akan": "ak", + "amharic": "am", + "arabic": "ar", + "armenian": "hy", + "assamese": "as", + "asturian": "ast", + "awadhi": "awa", + "ayacucho quechua": "quy", + "azerbaijani": "az", + "balinese": "ban", + "bambara": "bm", + "bangla": "bn", + "banjar": "bjn", + "bashkir": "ba", + "basque": "eu", + "belarusian": "be", + "bemba": "bmy", + "bhojpuri": "bho", + "bosnian": "bs", + "buginese": "bug", + "bulgarian": "bg", + "burmese": "my", + "cantonese": "yue", + "catalan": "ca", + "cebuano": "ceb", + "central atlas tamazight": "tzm", + "central aymara": "ayr", + "central kanuri": "knc", + "chhattisgarhi": "hne", + "chinese": "zh", + "chokwe": "cjk", + "crimean tatar": "crh", + "croatian": "hr", + "czech": "cs", + "danish": "da", + "dari": "prs", + "dutch": "nl", + "dzongkha": "dz", + "eastern yiddish": "ydd", + "egyptian arabic": "arz", + "english": "en", + "esperanto": "eo", + "estonian": "et", + "ewe": "ee", + "faroese": "fo", + "fijian": "fj", + "finnish": "fi", + "fon": "fon", + "french": "fr", + "friulian": "fur", + "galician": "gl", + "ganda": "lg", + "georgian": "ka", + "german": "de", + "greek": "el", + "guarani": "gn", + "gujarati": "gu", + "haitian": "ht", + "hausa": "ha", + "hebrew": "he", + "hindi": "hi", + "hungarian": "hu", + "icelandic": "is", + "igbo": "ig", + "iloko": "ilo", + "indonesian": "id", + "irish": "ga", + "italian": "it", + "japanese": "ja", + "javanese": "jv", + "jula": "dyu", + "kabiyè": "kbp", + "kabuverdianu": "kea", + "kabyle": "kab", + "kachin": "kac", + "kamba": "xba", + "kannada": "kn", + "kashmiri": "ks", + "kazakh": "kk", + "khmer": "km", + "kikuyu": "ki", + "kimbundu": "kmb", + "kinyarwanda": "rw", + "kongo": "kg", + "korean": "ko", + "kurdish kurmanji": "ku", + "kyrgyz": "ky", + "lao": "lo", + "latgalian": "ltg", + "latvian": "lv", + "ligurian": "lij", + "limburgish": "li", + "lingala": "ln", + "lithuanian": "lt", + "lombard": "lmo", + "luba-lulua": "lua", + "luo": "luo", + "lushai": "lus", + "luxembourgish": "lb", + "macedonian": "mk", + "magahi": "mag", + "maithili": "mai", + "malay": "zlm", + "malayalam": "ml", + "maltese": "mt", + "manipuri": "mni", + "maori": "mi", + "marathi": "mr", + "mesopotamian arabic": "acm", + "minangkabau": "min", + "mongolian": "mn", + "mòoré": "mos", + "moroccan arabic": "ary", + "najdi arabic": "ars", + "nepali": "npi", + "nigerian fulfulde": "fuv", + "north levantine arabic": "apc", + "northern sotho": "nso", + "norwegian": "no", + "norwegian nynorsk": "nn", + "nuer": "nus", + "nyanja": "ny", + "occitan": "oc", + "odia": "ory", + "pangasinan": "pag", + "panjabi": "pa", + "papiamento": "pap", + "persian": "fa", + "plateau malagasy": "plt", + "polish": "pl", + "portuguese": "pt", + "romanian": "ro", + "rundi": "rn", + "russian": "ru", + "samoan": "sm", + "sango": "sg", + "sanskrit": "sa", + "santali": "sat", + "sardinian": "sc", + "scottish gaelic": "gd", + "serbian": "sr", + "shan": "shn", + "shona": "sn", + "sicilian": "scn", + "silesian": "szl", + "sindhi": "sd", + "sinhala": "si", + "slovak": "sk", + "slovenian": "sl", + "somali": "so", + "sorani kurdish": "ckb", + "south azerbaijani": "azb", + "south levantine arabic": "ajp", + "southern pashto": "pbt", + "southern sotho": "st", + "southwestern dinka": "dik", + "spanish": "es", + "sundanese": "su", + "swahili": "sw", + "swati": "ss", + "swedish": "sv", + "ta'izzi-adeni arabic": "acq", + "tagalog": "tl", + "tajik": "tg", + "tamasheq": "taq", + "tamil": "ta", + "tatar": "tt", + "telugu": "te", + "thai": "th", + "tibetan": "bo", + "tigrinya": "ti", + "tok pisin": "tpi", + "tosk albanian": "als", + "tsonga": "ts", + "tswana": "tn", + "tumbuka": "tum", + "tunisian arabic": "aeb", + "turkish": "tr", + "turkmen": "tk", + "twi": "tw", + "uighur": "ug", + "ukrainian": "uk", + "umbundu": "umb", + "urdu": "ur", + "uzbek": "uz", + "venetian": "vec", + "vietnamese": "vi", + "waray (philippines)": "war", + "welsh": "cy", + "west central oromo": "gaz", + "wolof": "wo", + "xhosa": "xh", + "yoruba": "yo", + "zulu": "zu" +} + +WIKIMEDIA_MINT_LANGUAGE_TO_CODE = { + "achinese": "ace", + "iraqi arabic": "acm", + "acq": "acq", + "tunisian arabic": "aeb", + "afrikaans": "af", + "ajp": "ajp", + "akan": "ak", + "amharic": "am", + "levantine arabic": "apc", + "arabic": "ar", + "najdi arabic": "ars", + "moroccan arabic": "ary", + "egyptian arabic": "arz", + "assamese": "as", + "asturian": "ast", + "awadhi": "awa", + "aymara": "ay", + "azerbaijani": "az", + "south azerbaijani": "azb", + "bashkir": "ba", + "balinese": "ban", + "central bikol": "bcl", + "belarusian": "be", + "bemba": "bem", + "bulgarian": "bg", + "bhojpuri": "bh", + "banjar": "bjn", + "bambara": "bm", + "bangla": "bn", + "tibetan": "bo", + "bodo": "brx", + "bosnian": "bs", + "buginese": "bug", + "catalan": "ca", + "cebuano": "ceb", + "cherokee": "chr", + "cjk": "cjk", + "central kurdish": "ckb", + "crimean tatar": "crh", + "czech": "cs", + "welsh": "cy", + "danish": "da", + "german": "de", + "dinka": "din", + "dogri": "doi", + "dyula": "dyu", + "dzongkha": "dz", + "ewe": "ee", + "greek": "el", + "english": "en", + "esperanto": "eo", + "spanish": "es", + "estonian": "et", + "basque": "eu", + "persian": "fa", + "fula": "ff", + "finnish": "fi", + "fijian": "fj", + "faroese": "fo", + "fon": "fon", + "french": "fr", + "friulian": "fur", + "irish": "ga", + "gan chinese": "gan", + "scottish gaelic": "gd", + "galician": "gl", + "guarani": "gn", + "goan konkani": "gom", + "gujarati": "gu", + "gun": "guw", + "hausa": "ha", + "hebrew": "he", + "hindi": "hi", + "hne": "hne", + "croatian": "hr", + "haitian creole": "ht", + "hungarian": "hu", + "armenian": "hy", + "indonesian": "id", + "igbo": "ig", + "iloko": "ilo", + "icelandic": "is", + "italian": "it", + "japanese": "ja", + "javanese": "jv", + "georgian": "ka", + "kabyle": "kab", + "kachin": "kac", + "kamba": "kam", + "kabiye": "kbp", + "kabuverdianu": "kea", + "kongo": "kg", + "kikuyu": "ki", + "kazakh": "kk", + "khmer": "km", + "kimbundu": "kmb", + "kannada": "kn", + "knc": "knc", + "korean": "ko", + "kanuri": "kr", + "kashmiri": "ks", + "kurdish": "ku", + "kyrgyz": "ky", + "luxembourgish": "lb", + "ganda": "lg", + "limburgish": "li", + "ligurian": "lij", + "lombard": "lmo", + "lingala": "ln", + "lao": "lo", + "lithuanian": "lt", + "latgalian": "ltg", + "luba-lulua": "lua", + "luo": "luo", + "mizo": "lus", + "latvian": "lv", + "magahi": "mag", + "maithili": "mai", + "malagasy": "mg", + "māori": "mi", + "minangkabau": "min", + "macedonian": "mk", + "malayalam": "ml", + "mongolian": "mn", + "manipuri": "mni", + "mossi": "mos", + "marathi": "mr", + "malay": "ms", + "maltese": "mt", + "burmese": "my", + "nepali": "ne", + "dutch": "nl", + "norwegian nynorsk": "nn", + "norwegian": "no", + "northern sotho": "nso", + "nuer": "nus", + "nyanja": "ny", + "occitan": "oc", + "oromo": "om", + "odia": "or", + "punjabi": "pa", + "pangasinan": "pag", + "papiamento": "pap", + "polish": "pl", + "pashto": "ps", + "portuguese": "pt", + "quechua": "qu", + "rundi": "rn", + "romanian": "ro", + "russian": "ru", + "kinyarwanda": "rw", + "sanskrit": "sa", + "santali": "sat", + "sardinian": "sc", + "sicilian": "scn", + "sindhi": "sd", + "sango": "sg", + "shan": "shn", + "sinhala": "si", + "slovak": "sk", + "slovenian": "sl", + "samoan": "sm", + "shona": "sn", + "somali": "so", + "albanian": "sq", + "serbian": "sr", + "swati": "ss", + "southern sotho": "st", + "sundanese": "su", + "swedish": "sv", + "swahili": "sw", + "silesian": "szl", + "tamil": "ta", + "taq": "taq", + "telugu": "te", + "tajik": "tg", + "thai": "th", + "tigrinya": "ti", + "turkmen": "tk", + "tagalog": "tl", + "tswana": "tn", + "tongan": "to", + "tok pisin": "tpi", + "turkish": "tr", + "tsonga": "ts", + "tatar": "tt", + "tumbuka": "tum", + "twi": "tw", + "tahitian": "ty", + "central atlas tamazight": "tzm", + "uyghur": "ug", + "ukrainian": "uk", + "umbundu": "umb", + "urdu": "ur", + "uzbek": "uz", + "venetian": "vec", + "vietnamese": "vi", + "waray": "war", + "wolof": "wo", + "xhosa": "xh", + "yiddish": "yi", + "yoruba": "yo", + "chinese": "zh", + "zulu": "zu" +} \ No newline at end of file diff --git a/deep_translator/exceptions.py b/deep_translator/exceptions.py index 47314fb..52fc5f1 100644 --- a/deep_translator/exceptions.py +++ b/deep_translator/exceptions.py @@ -147,6 +147,7 @@ class ServerException(Exception): 403: "ERR_DAILY_REQ_LIMIT_EXCEEDED", 404: "ERR_DAILY_CHAR_LIMIT_EXCEEDED", 413: "ERR_TEXT_TOO_LONG", + 415: "UNSUPPORTED_MEDIA_TYPE", 429: "ERR_TOO_MANY_REQUESTS", 422: "ERR_UNPROCESSABLE_TEXT", 500: "ERR_INTERNAL_SERVER_ERROR", diff --git a/deep_translator/glosbe.py b/deep_translator/glosbe.py new file mode 100644 index 0000000..da2cb28 --- /dev/null +++ b/deep_translator/glosbe.py @@ -0,0 +1,105 @@ +""" +Glosbe translator API +""" + +__copyright__ = "Copyright (C) 2020 Nidhal Baccouri" + +from typing import List, Optional + +import requests + +from deep_translator.base import BaseTranslator +from deep_translator.constants import BASE_URLS, GLOSBE_LANGUAGE_TO_CODE +from deep_translator.exceptions import ( + RequestError, + TooManyRequests, + ServerException, + TranslationNotFound +) +from deep_translator.validate import is_empty, is_input_valid, request_failed + + +class GlosbeTranslator(BaseTranslator): + """ + class that wraps functions, which use Glosbe Translate under the hood to translate text(s) + """ + + def __init__( + self, + source: str = "en", + target: str = "pl", + proxies: Optional[dict] = None, + **kwargs + ): + """ + @param source: source language to translate from + @param target: target language to translate to + """ + self.proxies = proxies + super().__init__( + base_url=BASE_URLS.get("GLOSBE"), + source=source, + target=target, + languages=GLOSBE_LANGUAGE_TO_CODE, + **kwargs + ) + + def translate(self, text: str, **kwargs) -> str: + """ + function to translate a text + @param text: desired text to translate + @return: str: translated text + """ + if is_input_valid(text): + text = text.strip() + if self._same_source_target() or is_empty(text): + return text + + self._url_params["targetLang"] = self._target + self._url_params["sourceLang"] = self._source + + headers = { + "Content-Type" : "text/plain" + } + + # Check the connection and get the response + try: + response = requests.post( + self._base_url, params=self._url_params, data=text, headers=headers, proxies=self.proxies + ) + except ConnectionError: + raise ServerException() + + if response.status_code == 415: + raise ServerException(response.status_code) + elif response.status_code == 429: + raise TooManyRequests() + + if request_failed(status_code=response.status_code): + raise RequestError() + + # Get the response and check if it is not empty + res = response.json() + if not res: + raise TranslationNotFound(text) + + return res['translation'] + + + def translate_file(self, path: str, **kwargs) -> str: + """ + translate directly from file + @param path: path to the target file + @type path: str + @param kwargs: additional args + @return: str + """ + return self._translate_file(path, **kwargs) + + def translate_batch(self, batch: List[str], **kwargs) -> List[str]: + """ + translate a list of texts + @param batch: list of texts you want to translate + @return: list of translations + """ + return self._translate_batch(batch, **kwargs) diff --git a/deep_translator/mint.py b/deep_translator/mint.py new file mode 100644 index 0000000..3f51188 --- /dev/null +++ b/deep_translator/mint.py @@ -0,0 +1,111 @@ +""" +Wikimedia MinT translator API +""" + +__copyright__ = "Copyright (C) 2020 Nidhal Baccouri" + +from typing import List, Optional + +import requests +import json + +from deep_translator.base import BaseTranslator +from deep_translator.constants import BASE_URLS, WIKIMEDIA_MINT_LANGUAGE_TO_CODE +from deep_translator.exceptions import ( + RequestError, + TooManyRequests, + ServerException, + TranslationNotFound +) +from deep_translator.validate import is_empty, is_input_valid, request_failed + + +class WikimediaMinTMachineTranslator(BaseTranslator): + """ + class that wraps functions, which use Wikimedia Translate under the hood to translate text(s) + """ + + def __init__( + self, + source: str = "en", + target: str = "es", + proxies: Optional[dict] = None, + **kwargs + ): + """ + @param source: source language to translate from + @param target: target language to translate to + """ + self.proxies = proxies + super().__init__( + base_url=BASE_URLS.get("WIKIMEDIA_MINT"), + source=source, + target=target, + languages=WIKIMEDIA_MINT_LANGUAGE_TO_CODE, + **kwargs + ) + + def translate(self, text: str, **kwargs) -> str: + """ + function to translate a text + @param text: desired text to translate + @return: str: translated text + """ + if is_input_valid(text): + text = text.strip() + detail = kwargs.get("detail") + if self._same_source_target() or is_empty(text): + return text + + self._base_url = BASE_URLS.get("WIKIMEDIA_MINT") # reinitializing since in batch translation it appends the source and target to url in every iteration + self._base_url += "/" + self._source + self._base_url += "/" + self._target + + headers = { + "Content-Type" : "application/json" + } + + # Create the payload + data = json.dumps({"text" : text}) + + # Check the connection and get the response + try: + response = requests.post( + self._base_url, data=data, headers=headers, proxies=self.proxies + ) + except ConnectionError: + raise ServerException() + + if response.status_code in {502,400}: + raise ServerException(response.status_code) + elif response.status_code == 429: + raise TooManyRequests() + + if request_failed(status_code=response.status_code): + raise RequestError() + + # Get the response and check if it is not empty + res = response.json() + if not res: + raise TranslationNotFound(text) + + return res['translation'] if not detail else res + + + def translate_file(self, path: str, **kwargs) -> str: + """ + translate directly from file + @param path: path to the target file + @type path: str + @param kwargs: additional args + @return: str + """ + return self._translate_file(path, **kwargs) + + def translate_batch(self, batch: List[str], **kwargs) -> List[str]: + """ + translate a list of texts + @param batch: list of texts you want to translate + @return: list of translations + """ + return self._translate_batch(batch, **kwargs) diff --git a/docs/README.rst b/docs/README.rst index aee43ff..2312620 100644 --- a/docs/README.rst +++ b/docs/README.rst @@ -115,6 +115,8 @@ Features * Support for the `DeeplTranslator translator `_ (version >= 1.2.5) * Support for the `Papago translator `_ (version >= 1.4.4) * Support for the `Libre translator `_ +* Support for the `Glosbe translator `_ +* Support for the `Wikimedia MinT Machine translator `_ * Support for ChatGpt * Support for proxy usage * Automatic single language detection @@ -221,6 +223,8 @@ Imports PapagoTranslator, DeeplTranslator, QcriTranslator, + GlosbeTranslator, + WikimediaMinTMachineTranslator, single_detection, batch_detection) @@ -756,6 +760,71 @@ BaiduTranslator translated = BaiduTranslator(appid="your-appid", appkey="your-appkey" source="en", target="zh").translate_file('path/to/file') +Glosbe Translator +-------------------- + +.. note:: + + In Glosbe translate, you can use any language as an argument for the source language. + There are more than 190 lanugages are Supported by the GlosbeTranslator. + +- Simple translation + +.. code-block:: python + + text = 'Keep it up. You are awesome' + + translated = GlosbeTranslator(source='english', target='french').translate(text) + +- Translate batch of texts + +.. code-block:: python + + texts = ["hallo welt", "guten morgen"] + + # the translate_sentences function is deprecated, use the translate_batch function instead + translated = GlosbeTranslator('de', 'en').translate_batch(texts) + +- Translate text from txt/docx/pdf: + +.. code-block:: python + + path = "your_file.txt" + + translated = GlosbeTranslator(source='en', target='fr').translate_file(path) + +Wikimedia MinT Machine Translator +-------------------- + +.. note:: + + In Wikimedia MinT Machine translate, you can use any language as an argument for the source language. + There are more than 200 lanugages are Supported by the WikimediaMinTMachineTranslator. + +- Simple translation + +.. code-block:: python + + text = 'Keep it up. You are awesome' + + translated = WikimediaMinTMachineTranslator(source='english', target='french').translate(text) + +- Translate batch of texts + +.. code-block:: python + + texts = ["hallo welt", "guten morgen"] + + # the translate_sentences function is deprecated, use the translate_batch function instead + translated = WikimediaMinTMachineTranslator('de', 'en').translate_batch(texts) + +- Translate text from txt/docx/pdf: + +.. code-block:: python + + path = "your_file.txt" + + translated = GlosbeTranslator(source='en', target='fr').translate_file(path) Proxy usage ------------- diff --git a/examples/glosbe.py b/examples/glosbe.py new file mode 100644 index 0000000..1f3048a --- /dev/null +++ b/examples/glosbe.py @@ -0,0 +1,5 @@ +from deep_translator import GlosbeTranslator + +res = GlosbeTranslator(source="en",target="zh").translate("What is Lorem Ipsum? Lorem Ipsum is simply dummy text of the printing and typesetting industry.") + +print(res) \ No newline at end of file diff --git a/examples/mint.py b/examples/mint.py new file mode 100644 index 0000000..7d4b1e7 --- /dev/null +++ b/examples/mint.py @@ -0,0 +1,10 @@ +from deep_translator import WikimediaMinTMachineTranslator + +res = WikimediaMinTMachineTranslator(source="en",target="ace").translate("What is Lorem Ipsum? Lorem Ipsum is simply dummy text of the printing and typesetting industry.") + +print(res) + +# if you want detailed response then set detail=True +res = WikimediaMinTMachineTranslator(source="en",target="ace").translate("What is Lorem Ipsum? Lorem Ipsum is simply dummy text of the printing and typesetting industry.",detail=True) + +print(res) diff --git a/tests/test_glosbe.py b/tests/test_glosbe.py new file mode 100644 index 0000000..390d48b --- /dev/null +++ b/tests/test_glosbe.py @@ -0,0 +1,39 @@ +#!/usr/bin/env python + +"""Tests for `deep_translator` package.""" + +import pytest + +from deep_translator import GlosbeTranslator, exceptions +from deep_translator.constants import GLOSBE_LANGUAGE_TO_CODE + + +@pytest.fixture +def glosbe(): + return GlosbeTranslator(source="en", target="fr") + + +def test_inputs(): + with pytest.raises(exceptions.InvalidSourceOrTargetLanguage): + GlosbeTranslator(source="", target="") + + with pytest.raises(exceptions.InvalidSourceOrTargetLanguage): + GlosbeTranslator(source="auto", target="") + + with pytest.raises(exceptions.InvalidSourceOrTargetLanguage): + GlosbeTranslator(source="", target="en") + + +def test_abbreviations_and_languages_mapping(): + for abb, lang in GLOSBE_LANGUAGE_TO_CODE.items(): + l1 = GlosbeTranslator(source=abb) + l2 = GlosbeTranslator(source=lang) + assert l1._source == l2._source + + +def test_payload(glosbe): + with pytest.raises(exceptions.NotValidPayload): + glosbe.translate({}) + + with pytest.raises(exceptions.NotValidPayload): + glosbe.translate([]) diff --git a/tests/test_mint.py b/tests/test_mint.py new file mode 100644 index 0000000..4593eee --- /dev/null +++ b/tests/test_mint.py @@ -0,0 +1,40 @@ +#!/usr/bin/env python + +"""Tests for `deep_translator` package.""" + +import pytest + +from deep_translator import WikimediaMinTMachineTranslator, exceptions +from deep_translator.constants import WIKIMEDIA_MINT_LANGUAGE_TO_CODE + + +@pytest.fixture +def mint(): + return WikimediaMinTMachineTranslator(source="en", target="fr") + + +def test_inputs(): + with pytest.raises(exceptions.InvalidSourceOrTargetLanguage): + WikimediaMinTMachineTranslator(source="", target="") + + with pytest.raises(exceptions.InvalidSourceOrTargetLanguage): + WikimediaMinTMachineTranslator(source="auto", target="") + + with pytest.raises(exceptions.InvalidSourceOrTargetLanguage): + WikimediaMinTMachineTranslator(source="", target="en") + + +def test_abbreviations_and_languages_mapping(): + for abb, lang in WIKIMEDIA_MINT_LANGUAGE_TO_CODE.items(): + l1 = WikimediaMinTMachineTranslator(source=abb) + l2 = WikimediaMinTMachineTranslator(source=lang) + assert l1._source == l2._source + + +def test_payload(mint): + with pytest.raises(exceptions.NotValidPayload): + mint.translate({}) + + with pytest.raises(exceptions.NotValidPayload): + mint.translate([]) +