From ae19b6df0f4767073d77fabd93cf5665cb6d38dd Mon Sep 17 00:00:00 2001 From: Raman Antanevich Date: Tue, 20 Jul 2021 00:55:50 +0300 Subject: [PATCH] BeautifulSoup is replaced with regexp --- .gitignore | 138 +++++++++++++++++++++++++++++++++++++++++ reverso_api/context.py | 25 ++------ 2 files changed, 142 insertions(+), 21 deletions(-) create mode 100644 .gitignore diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..a81c8ee --- /dev/null +++ b/.gitignore @@ -0,0 +1,138 @@ +# Byte-compiled / optimized / DLL files +__pycache__/ +*.py[cod] +*$py.class + +# C extensions +*.so + +# Distribution / packaging +.Python +build/ +develop-eggs/ +dist/ +downloads/ +eggs/ +.eggs/ +lib/ +lib64/ +parts/ +sdist/ +var/ +wheels/ +share/python-wheels/ +*.egg-info/ +.installed.cfg +*.egg +MANIFEST + +# PyInstaller +# Usually these files are written by a python script from a template +# before PyInstaller builds the exe, so as to inject date/other infos into it. +*.manifest +*.spec + +# Installer logs +pip-log.txt +pip-delete-this-directory.txt + +# Unit test / coverage reports +htmlcov/ +.tox/ +.nox/ +.coverage +.coverage.* +.cache +nosetests.xml +coverage.xml +*.cover +*.py,cover +.hypothesis/ +.pytest_cache/ +cover/ + +# Translations +*.mo +*.pot + +# Django stuff: +*.log +local_settings.py +db.sqlite3 +db.sqlite3-journal + +# Flask stuff: +instance/ +.webassets-cache + +# Scrapy stuff: +.scrapy + +# Sphinx documentation +docs/_build/ + +# PyBuilder +.pybuilder/ +target/ + +# Jupyter Notebook +.ipynb_checkpoints + +# IPython +profile_default/ +ipython_config.py + +# pyenv +# For a library or package, you might want to ignore these files since the code is +# intended to run in multiple environments; otherwise, check them in: +# .python-version + +# pipenv +# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. +# However, in case of collaboration, if having platform-specific dependencies or dependencies +# having no cross-platform support, pipenv may install dependencies that don't work, or not +# install all needed dependencies. +#Pipfile.lock + +# PEP 582; used by e.g. github.com/David-OConnor/pyflow +__pypackages__/ + +# Celery stuff +celerybeat-schedule +celerybeat.pid + +# SageMath parsed files +*.sage.py + +# Environments +.env +.venv +env/ +venv/ +ENV/ +env.bak/ +venv.bak/ + +# Spyder project settings +.spyderproject +.spyproject + +# Rope project settings +.ropeproject + +# mkdocs documentation +/site + +# mypy +.mypy_cache/ +.dmypy.json +dmypy.json + +# Pyre type checker +.pyre/ + +# pytype static type analyzer +.pytype/ + +# Cython debug symbols +cython_debug/ diff --git a/reverso_api/context.py b/reverso_api/context.py index 6709e15..085d5be 100644 --- a/reverso_api/context.py +++ b/reverso_api/context.py @@ -1,5 +1,6 @@ """Reverso Context (context.reverso.net) API for Python""" +import re import json from collections import namedtuple from typing import Generator @@ -50,7 +51,6 @@ def __init__(self, self.__data_ismodified = True self.__total_pages = None - # FIXME: make self.supported_langs read-only self.supported_langs = self.__get_supported_langs() self.source_text, self.target_text = source_text, target_text @@ -71,26 +71,9 @@ def __eq__(self, other) -> bool: @staticmethod def __get_supported_langs() -> dict: - supported_langs = {} - response = requests.get("https://context.reverso.net/translation/", headers=HEADERS) - - soup = BeautifulSoup(response.content, features="lxml") - - src_selector = soup.find("div", id="src-selector") - trg_selector = soup.find("div", id="trg-selector") - - for selector, attribute in ((src_selector, "source_lang"), - (trg_selector, "target_lang")): - dd_spans = selector.find(class_="drop-down").find_all("span") - langs = [span.get("data-value") for span in dd_spans] - langs = [lang for lang in langs - if isinstance(lang, str) and len(lang) == 2] - - supported_langs[attribute] = tuple(langs) - - return supported_langs + return frozenset(re.findall('data-value="([a-z]{2})"', response.text)) @property def source_text(self) -> str: @@ -226,7 +209,7 @@ def target_text(self, value) -> None: def source_lang(self, value) -> None: value = str(value) - if value not in self.supported_langs["source_lang"]: + if value not in self.supported_langs: raise ValueError(f"{value!r} source language is not supported") if value == self.source_lang: @@ -239,7 +222,7 @@ def source_lang(self, value) -> None: def target_lang(self, value) -> None: value = str(value) - if value not in self.supported_langs["target_lang"]: + if value not in self.supported_langs: raise ValueError(f"{value!r} target language is not supported") if value == self.source_lang: