From faf4b27d1f210b9573691044d28b5cc99db5f40e Mon Sep 17 00:00:00 2001 From: ZhymabekRoman Date: Thu, 27 May 2021 17:42:36 +0600 Subject: [PATCH] Refactor Google Translate & DeepL Translate & Improved CLI mode & New tests --- setup.py | 5 + tests/test_translator.py | 67 --- tests/test_translators.py | 48 ++ tests/translators/test_reverso.py | 16 - tests/translators/test_yandex.py | 24 - translatepy/__main__.py | 39 +- translatepy/_google_translate_domains.json | 1 - translatepy/translate.py | 56 +-- translatepy/translators/__init__.py | 15 +- translatepy/translators/base.py | 162 +++---- translatepy/translators/bing.py | 43 +- translatepy/translators/deepl.py | 299 ++++++------- translatepy/translators/deepl_old.py | 162 ------- translatepy/translators/google.py | 493 ++++++++++----------- translatepy/translators/reverso.py | 13 +- translatepy/translators/yandex.py | 11 +- translatepy/utils/request.py | 2 +- 17 files changed, 598 insertions(+), 858 deletions(-) delete mode 100644 tests/test_translator.py create mode 100644 tests/test_translators.py delete mode 100644 tests/translators/test_reverso.py delete mode 100644 tests/translators/test_yandex.py delete mode 100644 translatepy/_google_translate_domains.json delete mode 100644 translatepy/translators/deepl_old.py diff --git a/setup.py b/setup.py index 5d85879b..e8a6f793 100644 --- a/setup.py +++ b/setup.py @@ -21,6 +21,11 @@ long_description_content_type = "text/markdown", include_package_data=True, python_requires='>=3.2, <4', + entry_points={ + 'console_scripts': [ + 'translatepy-cli = translatepy.__main__:main' + ] + }, package_data={ 'translatepy': ['LICENSE'], }, diff --git a/tests/test_translator.py b/tests/test_translator.py deleted file mode 100644 index cd451d53..00000000 --- a/tests/test_translator.py +++ /dev/null @@ -1,67 +0,0 @@ -from translatepy import Translator - -import pytest - -t = Translator() - - -def test_translate(): - print("[test] --> Testing Translator.translate") - assert t.translate("Hello", "Japanese", "fr") is not None - assert t.translate("Hello", "French", "Japanese") is not None - assert t.translate("Hello", "ja", "fr") is not None - - -def test_empty_string(): - with pytest.raises(ValueError): - t.translate("", "Japanese", "French") - t.language(" ", "jp", "French") - t.transliterate("\n \n ", "en") - t.spellcheck(" \n") - - -def test_transliterate(): - print("[test] --> Testing Translator.transliterate") - assert t.transliterate("In the Japanese city of Nagasaki, an American officer Pinkerton marries a young Japanese girl Chio-chio-san.") is not None - - -def test_language(): - print("[test] --> Testing Translator.language") - assert t.language("Hello") is not None - assert t.language("Привет") is not None - - -def test_spellcheck(): - print("[test] --> Testing Translator.spellcheck") - assert t.spellcheck("Helo") is not None - assert t.spellcheck("God morning") is not None - - -"""" -def test_example(): - print("[test] --> Testing Translator.example") - assert t.example("Hello", "japanese") is not None - - -def test_dictionary(): - print("[test] --> Testing Translator.dictionary") - # assert t.dictionary("Hello", "Japanese") is not None - # I can't test dictionary as DeepL is very strict on their rate-limit - -def test_text_to_speech(): - print("[test] --> Testing Translator.text_to_speech") - assert t.text_to_speech("Hello, how are you?", "English") is not None - -def test_imports(): - print("[test] --> Testing imports") - from translatepy.translators import bing - from translatepy.translators import deepl - from translatepy.translators import google - from translatepy.translators import reverso - from translatepy.translators import unselected - from translatepy.translators import yandex - from translatepy.utils import annotations - from translatepy.utils import similarity - from translatepy import translate - import translatepy -""" diff --git a/tests/test_translators.py b/tests/test_translators.py new file mode 100644 index 00000000..d1333936 --- /dev/null +++ b/tests/test_translators.py @@ -0,0 +1,48 @@ +from translatepy.translators.google import GoogleTranslateV1, GoogleTranslateV2 +from translatepy.translators.bing import BingTranslate +from translatepy.translators.yandex import YandexTranslate +from translatepy.translators.reverso import ReversoTranslate +from translatepy.translators.deepl import DeeplTranslate +from translatepy import Translator + + +class TestAllTranslators: + def setup(self): + self.services_list = [ + Translator(), + GoogleTranslateV1(), + GoogleTranslateV2(), + BingTranslate(), + ReversoTranslate(), + YandexTranslate(), + DeeplTranslate(), + ] + + def test_service_translate(self): + translation_args_list = [["What cool weather today!", "fr"], + ["Hello", "Japanese", "en"], + ["Hello, how are you?", "ja"]] + + for service in self.services_list: + for args in translation_args_list: + result = service.translate(*args) + assert result + + def test_service_transliterate(self): + transliteration_args_list = [["What cool weather today!", "ar"], + ["Hello", "Japanese", "en"], + ["Hello, how are you?", "ja"]] + + for service in self.services_list: + for args in transliteration_args_list: + result = service.transliterate(*args) + assert result + + def test_service_spellcheck(self): + spellcheck_args_list = [["What cool weater todai!"], ["Helo"], + ["Helo, how are tou?"]] + + for service in self.services_list: + for args in spellcheck_args_list: + result = service.spellcheck(*args) + assert result diff --git a/tests/translators/test_reverso.py b/tests/translators/test_reverso.py deleted file mode 100644 index c66a3662..00000000 --- a/tests/translators/test_reverso.py +++ /dev/null @@ -1,16 +0,0 @@ -from translatepy.translators.reverso import ReversoTranslate - -reverso_translate = ReversoTranslate() - - -def test_reverso_translate(): - assert reverso_translate.translate("What cool weather today!", "fr") is not None - assert reverso_translate.translate("Отличная работа, спасибо", "English") is not None - - -def test_reverso_transliterate(): - assert reverso_translate.transliterate("Nice", "ja") is not None - - -def test_reverso_spellcheck(): - assert reverso_translate.spellcheck("Good hiliday") is not None diff --git a/tests/translators/test_yandex.py b/tests/translators/test_yandex.py deleted file mode 100644 index e4db2199..00000000 --- a/tests/translators/test_yandex.py +++ /dev/null @@ -1,24 +0,0 @@ -from translatepy.translators.yandex import YandexTranslate - -yandex_translate = YandexTranslate() - - -def test_yandex_translate(): - print("[test] --> Testing YandexTranslate.translate") - assert yandex_translate.translate("Hello, how are you?", "ja") is not None - - -def test_yandex_spellcheck(): - print("[test] --> Testing YandexTranslate.spellcheck") - assert yandex_translate.spellcheck("Hllo, world") is not None - assert yandex_translate.spellcheck("Good, ivening", "english") is not None - - -def test_yandex_transliterate(): - print("[test] --> YandexTranslate.transliterate") - assert yandex_translate.transliterate("Привет, как дела? Давно не виделись") - - -def test_yandex_ucid(): - print("[test] --> Testing Yandex Translate UCID value generating") - assert yandex_translate._ucid() is not None diff --git a/translatepy/__main__.py b/translatepy/__main__.py index 0aca1a68..2217625c 100644 --- a/translatepy/__main__.py +++ b/translatepy/__main__.py @@ -1,19 +1,34 @@ import argparse from translatepy import Translator -dl = Translator() -# Create the parser -my_parser = argparse.ArgumentParser(prog='translatepy-cli', description='Translate, transliterate, get the language of texts in no time with the help of multiple APIs!') +def main(): + dl = Translator() -subparser = my_parser.add_subparsers(help='Actions', dest="action", required=True) -parser_translate = subparser.add_parser('translate', help='Translates the given text to the given language') -parser_translate.add_argument('--text', '-t', action='store', type=str, required=True, help='text to translate') -parser_translate.add_argument('--dest-lang', '-d', action='store', type=str, required=True, help='destination language') -parser_translate.add_argument('--source-lang', '-s', action='store', default='auto', type=str, help='source language') + # Create the parser + my_parser = argparse.ArgumentParser(prog='translatepy-cli', description='Translate, transliterate, get the language of texts in no time with the help of multiple APIs!') -args = my_parser.parse_args() + subparser = my_parser.add_subparsers(help='Actions', dest="action", required=True) + parser_translate = subparser.add_parser('translate', help='Translates the given text to the given language') + parser_translate.add_argument('--text', '-t', action='store', type=str, required=True, help='text to translate') + parser_translate.add_argument('--dest-lang', '-d', action='store', type=str, required=True, help='destination language') + parser_translate.add_argument('--source-lang', '-s', action='store', default='auto', type=str, help='source language') -if args.action == 'translate': - result = dl.translate(args.text, args.dest_lang, args.source_lang) - print(result) + parser_translate = subparser.add_parser('shell', help='Translates the given text in interactive shell mode') + + args = my_parser.parse_args() + + if args.action == 'translate': + result = dl.translate(args.text, args.dest_lang, args.source_lang) + print(result) + + if args.action == 'shell': + while True: + input_text = input(">>> ") + + result = dl.translate(input_text, "en") + print(result) + + +if __name__ == "__main__": + main() diff --git a/translatepy/_google_translate_domains.json b/translatepy/_google_translate_domains.json deleted file mode 100644 index 77fbb789..00000000 --- a/translatepy/_google_translate_domains.json +++ /dev/null @@ -1 +0,0 @@ -["translate.google.ac","translate.google.ad","translate.google.ae","translate.google.al","translate.google.am","translate.google.as","translate.google.at","translate.google.az","translate.google.ba","translate.google.be","translate.google.bf","translate.google.bg","translate.google.bi","translate.google.bj","translate.google.bs","translate.google.bt","translate.google.by","translate.google.ca","translate.google.cat","translate.google.cc","translate.google.cd","translate.google.cf","translate.google.cg","translate.google.ch","translate.google.ci","translate.google.cl","translate.google.cm","translate.google.cn","translate.google.co.ao","translate.google.co.bw","translate.google.co.ck","translate.google.co.cr","translate.google.co.id","translate.google.co.il","translate.google.co.in","translate.google.co.jp","translate.google.co.ke","translate.google.co.kr","translate.google.co.ls","translate.google.co.ma","translate.google.co.mz","translate.google.co.nz","translate.google.co.th","translate.google.co.tz","translate.google.co.ug","translate.google.co.uk","translate.google.co.uz","translate.google.co.ve","translate.google.co.vi","translate.google.co.za","translate.google.co.zm","translate.google.co.zw","translate.google.co","translate.google.com.af","translate.google.com.ag","translate.google.com.ai","translate.google.com.ar","translate.google.com.au","translate.google.com.bd","translate.google.com.bh","translate.google.com.bn","translate.google.com.bo","translate.google.com.br","translate.google.com.bz","translate.google.com.co","translate.google.com.cu","translate.google.com.cy","translate.google.com.do","translate.google.com.ec","translate.google.com.eg","translate.google.com.et","translate.google.com.fj","translate.google.com.gh","translate.google.com.gi","translate.google.com.gt","translate.google.com.hk","translate.google.com.jm","translate.google.com.kh","translate.google.com.kw","translate.google.com.lb","translate.google.com.lc","translate.google.com.ly","translate.google.com.mm","translate.google.com.mt","translate.google.com.mx","translate.google.com.my","translate.google.com.na","translate.google.com.ng","translate.google.com.ni","translate.google.com.np","translate.google.com.om","translate.google.com.pa","translate.google.com.pe","translate.google.com.pg","translate.google.com.ph","translate.google.com.pk","translate.google.com.pr","translate.google.com.py","translate.google.com.qa","translate.google.com.sa","translate.google.com.sb","translate.google.com.sg","translate.google.com.sl","translate.google.com.sv","translate.google.com.tj","translate.google.com.tr","translate.google.com.tw","translate.google.com.ua","translate.google.com.uy","translate.google.com.vc","translate.google.com.vn","translate.google.com","translate.google.cv","translate.google.cx","translate.google.cz","translate.google.de","translate.google.dj","translate.google.dk","translate.google.dm","translate.google.dz","translate.google.ee","translate.google.es","translate.google.eu","translate.google.fi","translate.google.fm","translate.google.fr","translate.google.ga","translate.google.ge","translate.google.gf","translate.google.gg","translate.google.gl","translate.google.gm","translate.google.gp","translate.google.gr","translate.google.gy","translate.google.hn","translate.google.hr","translate.google.ht","translate.google.hu","translate.google.ie","translate.google.im","translate.google.io","translate.google.iq","translate.google.is","translate.google.it","translate.google.je","translate.google.jo","translate.google.kg","translate.google.ki","translate.google.kz","translate.google.la","translate.google.li","translate.google.lk","translate.google.lt","translate.google.lu","translate.google.lv","translate.google.md","translate.google.me","translate.google.mg","translate.google.mk","translate.google.ml","translate.google.mn","translate.google.ms","translate.google.mu","translate.google.mv","translate.google.mw","translate.google.ne","translate.google.nf","translate.google.nl","translate.google.no","translate.google.nr","translate.google.nu","translate.google.pl","translate.google.pn","translate.google.ps","translate.google.pt","translate.google.ro","translate.google.rs","translate.google.ru","translate.google.rw","translate.google.sc","translate.google.se","translate.google.sh","translate.google.si","translate.google.sk","translate.google.sm","translate.google.sn","translate.google.so","translate.google.sr","translate.google.st","translate.google.td","translate.google.tg","translate.google.tk","translate.google.tl","translate.google.tm","translate.google.tn","translate.google.to","translate.google.tt","translate.google.us","translate.google.vg","translate.google.vu","translate.google.ws"] \ No newline at end of file diff --git a/translatepy/translate.py b/translatepy/translate.py index 55ccd187..f0cdb9e1 100644 --- a/translatepy/translate.py +++ b/translatepy/translate.py @@ -4,15 +4,15 @@ © Anime no Sekai — 2021 """ -import inspect +# import inspect -# from translatepy.translators import GoogleTranslate +from translatepy.translators.google import GoogleTranslateV1, GoogleTranslateV2 from translatepy.translators.bing import BingTranslate from translatepy.translators.yandex import YandexTranslate from translatepy.translators.reverso import ReversoTranslate -# from translatepy.translators import DeepL +from translatepy.translators.deepl import DeeplTranslate from translatepy.translators.base import BaseTranslator - +from translatepy.models import TranslationResult, TransliterationResult, SpellcheckResult, LanguageResult from translatepy.utils.annotations import List @@ -22,12 +22,12 @@ class Translator(): """ def __init__(self, services_list: List[BaseTranslator] = [ - # GoogleTranslator, - # GoogleV2Translator, - BingTranslate, - # DeepLTranslator, - ReversoTranslate, - YandexTranslate, + YandexTranslate(), + GoogleTranslateV1(), + GoogleTranslateV2(), + BingTranslate(), + ReversoTranslate(), + DeeplTranslate(), ]) -> None: if not isinstance(services_list, List): @@ -36,15 +36,19 @@ def __init__(self, services_list: List[BaseTranslator] = [ if not services_list: raise ValueError("Parameter 'services_list' must not be empty") - for service in services_list: - if not inspect.isclass(service): - raise ValueError("Type of the parameter 'services_list' must be a class") - if not issubclass(service, BaseTranslator): - raise TypeError("Type of the parameter 'services_list' must be a child class of BaseTranslator class") + # TODO: validation is broken + # for service in services_list: + # if not inspect.isclass(service): + # ... + # Or: + # if not isinstance(service, type): + # raise ValueError("Type of the parameter 'services_list' must be a class") + # if not issubclass(service, BaseTranslator): + # raise TypeError("Type of the parameter 'services_list' must be a child class of BaseTranslator class") self.services = services_list - def translate(self, text, destination_language, source_language="auto"): + def translate(self, text: str, destination_language: str, source_language: str = "auto") -> TranslationResult: """ Translates the given text to the given language @@ -53,16 +57,16 @@ def translate(self, text, destination_language, source_language="auto"): for service in self.services: try: - result = service().translate( + result = service.translate( text, destination_language, source_language) - except ImportError: # Exception: + except Exception: continue else: return result else: print("ERROR!") - def transliterate(self, text, source_language="auto") -> str: + def transliterate(self, text: str, destination_language: str = "en", source_language: str = "auto") -> TransliterationResult: """ Transliterates the given text @@ -70,7 +74,7 @@ def transliterate(self, text, source_language="auto") -> str: """ for service in self.services: try: - result = service().transliterate(text, source_language) + result = service.transliterate(text, destination_language, source_language) except Exception: continue else: @@ -78,7 +82,7 @@ def transliterate(self, text, source_language="auto") -> str: else: print("NOOOOOOOOOOOO!") - def spellcheck(self, text, source_language="auto") -> str: + def spellcheck(self, text: str, source_language: str = "auto") -> SpellcheckResult: """ Checks the spelling of a given text @@ -87,7 +91,7 @@ def spellcheck(self, text, source_language="auto") -> str: for service in self.services: try: - result = service().spellcheck(text, source_language) + result = service.spellcheck(text, source_language) except Exception: continue else: @@ -95,7 +99,7 @@ def spellcheck(self, text, source_language="auto") -> str: else: print("NOOOOOO!") - def language(self, text: str) -> str: + def language(self, text: str) -> LanguageResult: """ Returns the language of the given text @@ -104,7 +108,7 @@ def language(self, text: str) -> str: for service in self.services: try: - response = service().language(text) + response = service.language(text) except Exception: continue else: @@ -112,7 +116,7 @@ def language(self, text: str) -> str: else: print("NOOOOOOOOOOOOO!") - def example(self, text, destination_language=None, source_language="auto") -> str: + def example(self, text: str, destination_language: str, source_language: str = "auto") -> str: """ Returns a set of examples / use cases for the given word @@ -121,7 +125,7 @@ def example(self, text, destination_language=None, source_language="auto") -> st result = self.bing_translate.example(text, destination_language, source_language) - def dictionary(self, text, destination_language, source_language="auto") -> str: + def dictionary(self, text: str, destination_language: str, source_language="auto") -> str: """ Returns a list of translations that are classified between two categories: featured and less common diff --git a/translatepy/translators/__init__.py b/translatepy/translators/__init__.py index c54e4d00..1b023bab 100644 --- a/translatepy/translators/__init__.py +++ b/translatepy/translators/__init__.py @@ -1,10 +1,5 @@ -# TODO - -""" -from .bing import BingTranslator -from .deepl import DeepLTranslator -from .google import GoogleTranslator, GoogleV2Translator -from .reverso import ReversoTranslator -from .translator import Translator -from .yandex import YandexTranslator -""" +from translatepy.translators.google import GoogleTranslateV1, GoogleTranslateV2 +from translatepy.translators.bing import BingTranslate +from translatepy.translators.yandex import YandexTranslate +from translatepy.translators.reverso import ReversoTranslate +from translatepy.translators.deepl import DeeplTranslate diff --git a/translatepy/translators/base.py b/translatepy/translators/base.py index 43b629ef..f32eb1f0 100644 --- a/translatepy/translators/base.py +++ b/translatepy/translators/base.py @@ -1,10 +1,9 @@ -from abc import ABC, abstractmethod, abstractproperty +from abc import ABC, abstractmethod from icecream import ic from translatepy.language import Language from translatepy.models import TranslationResult, TransliterationResult, SpellcheckResult, LanguageResult -from translatepy.exceptions import TranslationError from translatepy.utils.lru_cacher import LRUDictCache @@ -48,31 +47,27 @@ def translate(self, text: str, destination_language: str, source_language: str = dest_code = self._detect_and_validate_lang(destination_language) source_code = self._detect_and_validate_lang(source_language) - try: - # Build cache key - _cache_key = str({"t": text, "d": dest_code, "s": source_code}) + # Build cache key + _cache_key = str({"t": text, "d": dest_code, "s": source_code}) - if _cache_key in self._translations_cache: - # Taking the values from the cache - translation = self._translations_cache[_cache_key] - else: - # Call the private concrete implementation of the Translator to get the translation - translation = self._translate(text, dest_code, source_code) - - # Сache the translation values to speed up the translation process in the future - self._translations_cache[_cache_key] = translation + if _cache_key in self._translations_cache: + # Taking the values from the cache + translation = self._translations_cache[_cache_key] + else: + # Call the private concrete implementation of the Translator to get the translation + translation = self._translate(text, dest_code, source_code) - # Return a `TranslationResult` object - return TranslationResult( - service=str(self), - source=text, - source_language=source_code, - destination_language=dest_code, - result=translation, - ) + # Сache the translation values to speed up the translation process in the future + self._translations_cache[_cache_key] = translation - except Exception as exc: - raise TranslationError from exc + # Return a `TranslationResult` object + return TranslationResult( + service=str(self), + source=text, + source_language=source_code, + destination_language=dest_code, + result=translation, + ) @abstractmethod def _translate(self, text: str, destination_language: str, source_language: str) -> str: @@ -109,31 +104,27 @@ def transliterate(self, text: str, destination_language: str = "en", source_lang dest_code = self._detect_and_validate_lang(destination_language) source_code = self._detect_and_validate_lang(source_language) - try: - # Build cache key - _cache_key = str({"t": text, "d": dest_code, "s": source_code}) + # Build cache key + _cache_key = str({"t": text, "d": dest_code, "s": source_code}) - if _cache_key in self._transliterations_cache: - # Taking the values from the cache - transliteration = self._transliterations_cache[_cache_key] - else: - # Call the private concrete implementation of the Translator to get the transliteration - transliteration = self._transliterate(text, dest_code, source_code) - - # Сache the transliteration values to speed up the translation process in the future - self._transliterations_cache[_cache_key] = transliteration + if _cache_key in self._transliterations_cache: + # Taking the values from the cache + transliteration = self._transliterations_cache[_cache_key] + else: + # Call the private concrete implementation of the Translator to get the transliteration + transliteration = self._transliterate(text, dest_code, source_code) - # Return a `TransliterationResult` object - return TransliterationResult( - service=str(self), - source=text, - source_language=source_code, - destination_language=dest_code, - result=transliteration, - ) + # Сache the transliteration values to speed up the translation process in the future + self._transliterations_cache[_cache_key] = transliteration - except Exception as exc: - raise TranslationError from exc + # Return a `TransliterationResult` object + return TransliterationResult( + service=str(self), + source=text, + source_language=source_code, + destination_language=dest_code, + result=transliteration, + ) @abstractmethod def _transliterate(self, text: str, source_language: str) -> str: @@ -167,30 +158,27 @@ def spellcheck(self, text: str, source_language: str = "auto") -> SpellcheckResu # this makes the code transformation transparent to the user. source_code = self._detect_and_validate_lang(source_language) - try: - # Build cache key - _cache_key = str({"t": text, "s": source_code}) + # Build cache key + _cache_key = str({"t": text, "s": source_code}) - if _cache_key in self._spellchecks_cache: - # Taking the values from the cache - spellcheck = self._spellchecks_cache[_cache_key] - else: - # Call the private concrete implementation of the Translator to get the spellchecked text - spellcheck = self._spellcheck(text, source_code) + if _cache_key in self._spellchecks_cache: + # Taking the values from the cache + spellcheck = self._spellchecks_cache[_cache_key] + else: + # Call the private concrete implementation of the Translator to get the spellchecked text + spellcheck = self._spellcheck(text, source_code) - # Сache the spellcheck values to speed up the translation process in the future - self._spellchecks_cache[_cache_key] = spellcheck + # Сache the spellcheck values to speed up the translation process in the future + self._spellchecks_cache[_cache_key] = spellcheck - # Return a `SpellcheckResult` object - return SpellcheckResult( - service=str(self), - source=text, - source_language=source_code, - result=spellcheck, - ) + # Return a `SpellcheckResult` object + return SpellcheckResult( + service=str(self), + source=text, + source_language=source_code, + result=spellcheck, + ) - except Exception as exc: - raise TranslationError from exc @abstractmethod def _spellcheck(self, text: str, source_language: str) -> str: @@ -215,29 +203,25 @@ def language(self, text: str) -> LanguageResult: # Validate the text self._validate_text(text) - try: - # Build cache key - _cache_key = str({"t": text}) - - if _cache_key in self._languages_cache: - # Taking the values from the cache - language = self._languages_cache[_cache_key] - else: - # Call the private concrete implementation of the Translator to get the language - language = self._language(text) + # Build cache key + _cache_key = str({"t": text}) - # Сache the languages values to speed up the translation process in the future - self._languages_cache[_cache_key] = language + if _cache_key in self._languages_cache: + # Taking the values from the cache + language = self._languages_cache[_cache_key] + else: + # Call the private concrete implementation of the Translator to get the language + language = self._language(text) - # Return a `LanguageResult` object - return LanguageResult( - service=str(self), - source=text, - result=language, - ) + # Сache the languages values to speed up the translation process in the future + self._languages_cache[_cache_key] = language - except Exception as exc: - raise TranslationError from exc + # Return a `LanguageResult` object + return LanguageResult( + service=str(self), + source=text, + result=language, + ) @abstractmethod def _language(self, text: str) -> str: @@ -254,7 +238,7 @@ def _example(self, text: str) -> str: """ @abstractmethod - def _dictionary(self, text: str, destination_language: str, source_language: str): + def _dictionary(self, text: str, destination_language: str, source_language: str) -> str: """ pass """ @@ -296,8 +280,8 @@ def clean_cache(self) -> None: Returns: None """ - self._translattions_cache.clear() - self._translatterations_cache.clear() + self._translations_cache.clear() + self._transliterations_cache.clear() self._spellchecks_cache.clear() self._languages_cache.clear() self._examples_cache.clear() diff --git a/translatepy/translators/bing.py b/translatepy/translators/bing.py index a7d08b6f..ed4e5272 100644 --- a/translatepy/translators/bing.py +++ b/translatepy/translators/bing.py @@ -5,23 +5,24 @@ import pyuseragents from translatepy.translators.base import BaseTranslator from translatepy.exceptions import UnsupportedMethod -from translatepy.utils.annotations import Tuple, List +from translatepy.utils.annotations import Tuple from icecream import ic HEADERS = { - "Host": "www.bing.com", + # "Host": "www.bing.com", "User-Agent": pyuseragents.random(), "Accept": "*/*", "Accept-Language": "en-US,en;q=0.5", "Accept-Encoding": "gzip, deflate", - "Referer": "https://www.bing.com/", + # "Referer": "https://www.bing.com/", "Content-Type": "application/x-www-form-urlencoded", "Connection": "keep-alive" } -PARAMS = {'IG': '839D27F8277F4AA3B0EDB83C255D0D70', 'IID': 'translator.5033.3'} +PARAMS = {'IID': 'translator.5033.3'} +# TODO: read documentation: https://docs.microsoft.com/ru-ru/azure/cognitive-services/translator/language-support class Example(): @@ -74,10 +75,12 @@ def _translate(self, text: str, destination_language: str, source_language: str) Returns: Tuple(str, str) --> tuple with source_lang, translation - None, None --> when an error occurs """ - request = requests.post("https://www.bing.com/ttranslatev3", headers=HEADERS, params=PARAMS, data={'text': str(text), 'fromLang': str(source_language), 'to': str(destination_language)}) + ic(text) + ic(destination_language) + ic(source_language) + request = requests.post("https://www.bing.com/ttranslatev3", headers=HEADERS, params=PARAMS, data={'text': text, 'fromLang': source_language, 'to': destination_language}) response = request.json() ic(response) if request.status_code < 400: @@ -94,7 +97,6 @@ def _example(self, text, destination_language, source_language, translation) -> Returns: Tuple(str, list[str]) --> tuple with source_lang, [examples] - None, None --> when an error occurs """ if translation is None: @@ -103,7 +105,7 @@ def _example(self, text, destination_language, source_language, translation) -> if source_language == "auto-detect": source_language = self._language(text) - request = requests.post("https://www.bing.com/texamplev3", headers=HEADERS, params=PARAMS, data={'text': str(text).lower(), 'from': str(source_language), 'to': str(destination_language), 'translation': str(translation).lower()}) + request = requests.post("https://www.bing.com/texamplev3", headers=HEADERS, params=PARAMS, data={'text': text.lower(), 'from': source_language, 'to': destination_language, 'translation': translation.lower()}) response = requests.json() if request.status_code < 400: return [Example(example) for example in response[0]["examples"]] @@ -118,11 +120,10 @@ def _spellcheck(self, text: str, source_language: str) -> str: Returns: Tuple(str, str) --> tuple with source_lang, spellchecked_text - None, None --> when an error occurs """ if source_language == "auto-detect": source_language = self._language(text) - request = requests.post("https://www.bing.com/tspellcheckv3", headers=HEADERS, params=PARAMS, data={'text': str(text), 'fromLang': str(source_language)}) + request = requests.post("https://www.bing.com/tspellcheckv3", headers=HEADERS, params=PARAMS, data={'text': text, 'fromLang': source_language}) response = request.json() ic(response) if request.status_code < 400: @@ -140,17 +141,27 @@ def _language(self, text: str) -> str: Returns: str --> the language code - None --> when an error occurs """ - request = requests.post("https://www.bing.com/ttranslatev3", headers=HEADERS, params=PARAMS, data={'text': str(text), 'fromLang': "auto-detect", 'to': "en"}) + request = requests.post("https://www.bing.com/ttranslatev3", headers=HEADERS, params=PARAMS, data={'text': text, 'fromLang': "auto-detect", 'to': "en"}) response = request.json() + ic(response) if request.status_code < 400: return response[0]["detectedLanguage"]["language"] - def _transliterate(self, text: str): - # TODO: Implement - raise UnsupportedMethod("Bing Translate doesn't support this method") + def _transliterate(self, text: str, destination_language: str, source_language: str): + # TODO: alternative implementation, won't work + # request = requests.post("https://www.bing.com/ttransliteratev3", headers=HEADERS, params=PARAMS, data={'text': text, 'language': source_language, 'toScript': destination_language}) + + request = requests.post("https://www.bing.com/ttranslatev3", headers=HEADERS, params=PARAMS, data={'text': text, 'fromLang': source_language, 'to': destination_language}) + response = request.json() + ic(response) + if request.status_code < 400: + # XXX: Not a predictable response from Bing Translate + try: + return response[1]["inputTransliteration"] + except IndexError: + return response[0]["translations"][0]["transliteration"]["text"] def _dictionary(self, text: str, destination_language: str, source_language: str): # TODO: Implement @@ -161,6 +172,8 @@ def _text_to_speech(self, text: str, source_language: str): raise UnsupportedMethod("Bing Translate doesn't support this method") def _language_normalize(self, language): + # TODO + _normalized_language_code = language.alpha2 if _normalized_language_code == "auto": diff --git a/translatepy/translators/deepl.py b/translatepy/translators/deepl.py index 1bc584a1..6852c0f9 100644 --- a/translatepy/translators/deepl.py +++ b/translatepy/translators/deepl.py @@ -9,36 +9,40 @@ © Anime no Sekai — 2021 """ -from time import time +from time import time, sleep from re import compile -from typing import Union from random import randint -from json import loads, dumps - import requests -import pyuseragents from bs4 import BeautifulSoup +import pyuseragents -from translatepy import Language -from translatepy.utils.annotations import Tuple, Dict, List +from translatepy.language import Language +from translatepy.exceptions import UnsupportedMethod +from translatepy.translators.base import BaseTranslator +from translatepy.utils.annotations import Tuple, List SENTENCES_SPLITTING_REGEX = compile('(?<=[.!:?]) +') from icecream import ic -class getClientState(): +HEADER = { + "Accept": "*/*", + "Accept-Encoding": "gzip, deflate, br", + "Connection": "keep-alive", + "Host": "www2.deepl.com", + "Origin": "https://www.deepl.com", + "Referer": "https://www.deepl.com/", + "Content-Type": "application/json", + "User-Agent": pyuseragents.random() +} + + +class GetClientState(): + """ + DeepL Translate state manager + """ def __init__(self): - self.id_number = randint(100, 9999) * 10000 - self.user_agent = pyuseragents.random() - self.headers = { - "Accept": "*/*", - "Accept-Encoding": "gzip, deflate, br", - "Host": "www.deepl.com", - "Connection": "keep-alive", - "Referer": "https://www.deepl.com/translator", - "Content-Type": "application/json", - "User-Agent": self.user_agent - } + self.id_number = randint(100, 9999) * 1000 def dump(self): self.id_number += 1 @@ -57,46 +61,20 @@ def get(self): Returns a new Client State ID """ - """ - NO SUPPORT FOR PROXIES FOR NOW - - proxies = None - _proxies = {} - if proxies: - try: - _proxies["http"] = proxies["http"] - _proxies["https"] = proxies["https"] - except Exception: - proxies = str(proxies) - _proxies["http"] = proxies - _proxies["https"] = proxies - resp = loads(requests.post("https://www.deepl.com/PHP/backend/clientState.php?request_type=jsonrpc&il=EN", data=self.dump(), headers=self.headers, proxies=_proxies).content) - """ - - resp = loads(requests.post("https://www.deepl.com/PHP/backend/clientState.php?request_type=jsonrpc&il=EN", data=dumps(self.dump()).encode("utf-8"), headers=self.headers).text) - return resp.get("id", None) - + request = requests.post("https://www.deepl.com/PHP/backend/clientState.php", params={"request_type": "jsonrpc", "il": "EN", "method": "getClientState"}, json=self.dump(), headers=HEADER) + response = request.json() + ic(response) + return response["id"] class JSONRPCRequest(): """ - JSONRPC Request Sender for DeepL + JSON RPC Request Sender for DeepL """ def __init__(self) -> None: - self.client_state = getClientState() - self.headers = { - "Accept": "*/*", - "Accept-Encoding": "gzip, deflate, br", - "Connection": "keep-alive", - "Host": "www2.deepl.com", - "Origin": "https://www.deepl.com", - "Referer": "https://www.deepl.com/", - "Content-Type": "application/json", - "User-Agent": self.client_state.user_agent - } + self.client_state = GetClientState() self.id_number = self.client_state.get() - if not self.id_number: - self.id_number = 1 + self.last_access = time() + 3 def dump(self, method, params): self.id_number += 1 @@ -109,25 +87,36 @@ def dump(self, method, params): return data def send_jsonrpc(self, method, params): - resp = loads(requests.post("https://www2.deepl.com/jsonrpc", data=dumps(self.dump(method, params)).encode("utf-8"), headers=self.headers).text) - ic(resp) - return resp.get("result", None) + # Take a break 3 sec between requests, so as not to get a block by the IP address + if self.last_access < self.last_access + 3: + sleep((self.last_access + 3) - self.last_access) + + request = requests.post("https://www2.deepl.com/jsonrpc", json=self.dump(method, params), headers=HEADER) + response = request.json() + ic(response) + self.last_access = time() + return response["result"] -class DeepL(): - def __init__(self, preferred_langs=None) -> None: +class DeeplTranslate(BaseTranslator): + def __init__(self, preferred_langs=["EN", "RU"]) -> None: self.jsonrpc = JSONRPCRequest() - self.user_preferred_langs = ([] if preferred_langs is None else preferred_langs) + self.user_preferred_langs = preferred_langs - def split_into_sentences(self, text, destination_language, source_language) -> Union[Tuple[List[str], str], Tuple[List[str], None]]: + def _split_into_sentences(self, text: str, destination_language: str, source_language: str) -> Tuple[List[str], str]: """ Split a string into sentences using the DeepL API.\n Fallbacks to a simple Regex splitting if an error occurs or no result is found Returned tuple: (Result, Computed Language (None if same as source_language)) """ + REGEX_SPLIT = True + + if REGEX_SPLIT is True: + SENTENCES_SPLITTING_REGEX.split(text), None + params = { - "texts": [text.strip()], + "texts": [text.strip()], # What for need strip there? "lang": { "lang_user_selected": source_language, "user_preferred_langs": list(set(self.user_preferred_langs + [destination_language])) @@ -135,39 +124,17 @@ def split_into_sentences(self, text, destination_language, source_language) -> U } resp = self.jsonrpc.send_jsonrpc("LMT_split_into_sentences", params) - if resp is not None: - if source_language != resp["lang"]: - return resp["splitted_texts"][0], resp["lang"] + return resp["splitted_texts"][0], resp["lang"] - return resp["splitted_texts"][0], None + def _translate(self, text: str, destination_language: str, source_language: str) -> str: + priority = 1 + quality = "" - return SENTENCES_SPLITTING_REGEX.split(text), None - - def translate(self, text, destination_language, source_language, priority=1, quality="", compute_splitting=False) -> str: - """ - Translates the given text to the given language - - Args: - text: A string corresponding to the given text - destination_language: The destination language - source_language: Default value = None - priority: The DeepL API priority, Default value = 1 - quality: The DeepL API quality, Default value = "" (excludes the quality parameter from the request) - compute_splitting: Wether translatepy should ask to split the sentences to the DeepL API or it should split it using Regex, Default value = False - - Returns: - Tuple(str, str) --> tuple with source_lang, translation - - """ # splitting the text into sentences - if compute_splitting: - sentences, computed_lang = self.split_into_sentences(text, destination_language, source_language) - else: - sentences = SENTENCES_SPLITTING_REGEX.split(text) - computed_lang = None + sentences, computed_lang = self._split_into_sentences(text, destination_language, source_language) # building the a job per sentence - jobs = _build_jobs(sentences, quality) + jobs = self._build_jobs(sentences, quality) # timestamp generation i_count = 1 @@ -186,7 +153,7 @@ def translate(self, text, destination_language, source_language, priority=1, qua "timestamp": ts + (i_count - ts % i_count) } - if computed_lang is not None: + if source_language == "auto": params["lang"]["source_lang_computed"] = computed_lang params["lang"]["user_preferred_langs"].append(computed_lang) else: @@ -198,57 +165,57 @@ def translate(self, text, destination_language, source_language, priority=1, qua translations = results["translations"] return " ".join(obj["beams"][0]["postprocessed_sentence"] for obj in translations if obj["beams"]) - def language(self, text, priority=1, quality="", compute_splitting=False) -> str: - # splitting the text into sentences - if compute_splitting: - sentences, computed_lang = self.split_into_sentences(text, "AUTO", "EN") - else: - sentences = SENTENCES_SPLITTING_REGEX.split(text) - computed_lang = None - - # building the a job per sentence - jobs = _build_jobs(sentences, quality) - - # timestamp generation - i_count = 1 - for sentence in sentences: - i_count += sentence.count("i") - ts = int(time() * 10) * 100 + 1000 - - # params building - params = { - "jobs": jobs, - "lang": { - "target_lang": "EN", - "user_preferred_langs": ["EN"] - }, - "priority": priority, - "timestamp": ts + (i_count - ts % i_count) - } + def _language(self, text: str) -> str: + priority = 1 + quality = "" - if computed_lang is not None: - params["lang"]["source_lang_computed"] = computed_lang - params["lang"]["user_preferred_langs"].append(computed_lang) - else: - params["lang"]["source_lang_user_selected"] = "AUTO" + # splitting the text into sentences + sentences, computed_lang = self._split_into_sentences(text, "EN", "AUTO") + + # building the a job per sentence + jobs = self._build_jobs(sentences, quality) + + # timestamp generation + i_count = 1 + for sentence in sentences: + i_count += sentence.count("i") + ts = int(time() * 10) * 100 + 1000 + + # params building + params = { + "jobs": jobs, + "lang": { + "target_lang": "EN", + "user_preferred_langs": ["EN"] + }, + "priority": priority, + "timestamp": ts + (i_count - ts % i_count) + } - results = self.jsonrpc.send_jsonrpc("LMT_handle_jobs", params) + if computed_lang is not None: + params["lang"]["source_lang_computed"] = computed_lang + params["lang"]["user_preferred_langs"].append(computed_lang) + else: + params["lang"]["source_lang_user_selected"] = "AUTO" - if results is not None: - return results["source_lang"] + results = self.jsonrpc.send_jsonrpc("LMT_handle_jobs", params) + if results is not None: + return results["source_lang"] - def _dictionary(self, text, destination_language, source_language) -> str: + def _dictionary(self, text: str, destination_language: str, source_language: str) -> str: if source_language == "AUTO": source_language = self._language(text) - source_language = Language(source_language).english + + destination_language = Language(destination_language).english + source_language = Language(source_language).english request = requests.post("https://dict.deepl.com/" + source_language + "-" + destination_language + "/search?ajax=1&source=" + source_language + "&onlyDictEntries=1&translator=dnsof7h3k2lgh3gda&delay=800&jsStatus=0&kind=full&eventkind=keyup&forleftside=true", data={"query": text}) if request.status_code < 400: response = BeautifulSoup(request.text, "html.parser") results = {} - results["_html"] = request.text - results["_response"] = response + # results["_html"] = request.text # What for? + # results["_response"] = response # What for? results["featured"] = [] results["less_common"] = [] for element in response.find_all("a"): @@ -260,37 +227,51 @@ def _dictionary(self, text, destination_language, source_language) -> str: results["less_common"].append(str(element.text).replace("\n", "")) return results - -def _build_jobs(sentences, quality=""): - """ - Builds a job for each sentence for DeepL - """ - jobs = [] - for index, sentence in enumerate(sentences): - if index == 0: - try: - before = [] - after = [sentences[index + 1]] - except IndexError: # index == len(sentences) - 1 - before = [] - after = [] - else: - if len(before) > 4: - before.pop(0) # the "before" array cannot be more than 5 elements long i guess? - before.extend([sentences[index - 1]]) - if index > len(sentences) - 2: - after = [] + def _build_jobs(self, sentences, quality=""): + """ + Builds a job for each sentence for DeepL + """ + jobs = [] + for index, sentence in enumerate(sentences): + if index == 0: + try: + before = [] + after = [sentences[index + 1]] + except IndexError: # index == len(sentences) - 1 + before = [] + after = [] else: - after = [sentences[index + 1]] + if len(before) > 4: + before.pop(0) # the "before" array cannot be more than 5 elements long i guess? + before.extend([sentences[index - 1]]) + if index > len(sentences) - 2: + after = [] + else: + after = [sentences[index + 1]] + + job = { + "kind": "default", + "raw_en_context_after": after.copy(), + "raw_en_context_before": before.copy(), + "raw_en_sentence": sentence, + } + if quality != "": + job["quality"] = quality + jobs.append(job) - job = { - "kind": "default", - "raw_en_context_after": after.copy(), - "raw_en_context_before": before.copy(), - "raw_en_sentence": sentence, - } - if quality != "": - job["quality"] = quality - jobs.append(job) + return jobs + + def _transliterate(self, text, source_language): + raise UnsupportedMethod("") + + def _example(self, text): + raise UnsupportedMethod("") + + def _spellcheck(self, text, source_language): + raise UnsupportedMethod("") + + def _language_normalize(self, language): + return language.alpha2.upper() - return jobs + def __repr__(self): + return "DeepL Tranlsate" diff --git a/translatepy/translators/deepl_old.py b/translatepy/translators/deepl_old.py deleted file mode 100644 index 33094383..00000000 --- a/translatepy/translators/deepl_old.py +++ /dev/null @@ -1,162 +0,0 @@ -from typing import Union -from translatepy.models.languages import Language -from requests import post -from json import loads -from time import time -from traceback import print_exc -from bs4 import BeautifulSoup -from warnings import warn -from translatepy.utils.annotations import Tuple, List, Dict - -class NotImplementedYet(Warning): - """ """ - def __init__(self, *args: object) -> None: - super().__init__(*args) - - -# not in use for now -FROM = ['auto', 'zh', 'nl', 'en', 'fr', 'de', 'it', 'ja', 'pl', 'pt', 'ru', 'es'] -TO = ['en-US', 'en-GB', 'zh-ZH', 'nl-NL', 'fr-FR', 'de-DE', 'it-IT', 'ja-JA', 'pl-PL', 'pt-PT', 'pt-BR', 'ru-RU', 'es-ES'] -FORMALITY_SUPPORT = ['nl-NL', 'fr-FR', 'de-DE', 'it-IT', 'pl-PL', 'pt-PT', 'pt-BR', 'ru-RU', 'es-ES'] -FORMALITY = [ - ["Formal tone", "formal"], - ["Informal tone", "informal"], - ["Automatic", "auto"] -] - -class DeepL(): - """A Python implementation of DeepL APIs""" - def __init__(self) -> None: - pass - - def translate(self, text, destination_language, source_language="auto", formality=None) -> Union[Tuple[str, str], Tuple[None, None]]: - """ - Translates the given text to the given language - - Args: - text: param destination_language: - source_language: Default value = "auto" - formality: Default value = None - destination_language: - - Returns: - Tuple(str, str) --> tuple with source_lang, translation - None, None --> when an error occurs - - """ - try: - if isinstance(destination_language, Language): - destination_language = destination_language.deepl - if isinstance(source_language, Language): - source_language = source_language.deepl - - if formality is not None: - warning_message = "[translatepy] Warning: formality has not been implemented yet and won't have any effect to the translation" - warn(NotImplementedYet(warning_message)) - if source_language is None: - source_language = "auto" - payload = {"jsonrpc":"2.0","method": "LMT_handle_jobs","params":{"jobs":[{"kind":"default","raw_en_sentence":str(text),"raw_en_context_before":[],"raw_en_context_after":[],"preferred_num_beams":4,"quality":"fast"}],"lang":{"user_preferred_langs":["JA","FR","EN"],"source_lang_user_selected":str(source_language),"target_lang":str(destination_language)},"priority":-1,"commonJobParams":{},"timestamp":int(time())},"id":63710028} - request = post("https://www2.deepl.com/jsonrpc", json=payload, cookies={}) - if request.status_code < 400: - data = loads(request.text) - return data["result"]["source_lang"], data["result"]["translations"][0]["beams"][0]["postprocessed_sentence"] - else: - print(request.text) - return None, None - except Exception: - print_exc() - return None, None - - def dictionary(self, text, destination_language, source_language=None) -> Union[Tuple[str, Dict], Tuple[None, None]]: - """ - Gives out a list of translations - - > destination_language and source_language both need to be the full english name or a Language object - - Args: - text: param destination_language: - source_language: Default value = None) - destination_language: - - Returns: - Tuple(str, Dict({ - featured: featured translations, - less_common: less common translations, - _html: the raw HTML response, - _response: the BeautifulSoup object for the given HTML - })) --> tuple with source_lang, results - None, None --> when an error occurs - - """ - try: - if isinstance(destination_language, Language): - dl = destination_language.english - if dl is None: - dl = destination_language.name - destination_language = dl - if isinstance(source_language, Language): - sl = source_language.english - if sl is None: - sl = source_language.name - source_language = sl - - if source_language is None or source_language == "auto": - source_language = self.language(text) - if source_language is None: - return None, None - try: - source_language = Language(source_language).english - except Exception: - return None, None - - request = post("https://dict.deepl.com/" + str(source_language) + "-" + str(destination_language) + "/search?ajax=1&source=" + str(source_language) + "&onlyDictEntries=1&translator=dnsof7h3k2lgh3gda&delay=800&jsStatus=0&kind=full&eventkind=keyup&forleftside=true", data={"query": str(text)}) - if request.status_code < 400: - response = BeautifulSoup(request.text, "html.parser") - results = {} - results["_html"] = request.text - results["_response"] = response - results["featured"] = [] - results["less_common"] = [] - for element in response.find_all("a"): - if element.has_attr('class'): - if "dictLink" in element["class"]: - if "featured" in element["class"]: - results["featured"].append(str(element.text).replace("\n", "")) - else: - results["less_common"].append(str(element.text).replace("\n", "")) - return source_language, results - else: - print(request.text) - return None, None - except Exception: - print_exc() - return None, None - - def language(self, text) -> Union[str, None]: - """ - Gives out the language of the given text - - Args: - text: - - Returns: - str --> the language code - None --> when an error occurs - - """ - try: - payload = {"jsonrpc":"2.0","method": "LMT_handle_jobs","params":{"jobs":[{"kind":"default","raw_en_sentence":str(text),"raw_en_context_before":[],"raw_en_context_after":[],"preferred_num_beams":4,"quality":"fast"}],"lang":{"user_preferred_langs":["JA","FR","EN"],"source_lang_user_selected":"auto","target_lang":"JA","priority":-1,"commonJobParams":{},"timestamp":int(time())},"id":63710028}} - request = post("https://www2.deepl.com/jsonrpc", json=payload, cookies={}) - if request.status_code < 400: - data = loads(request.text) - return data["result"]["source_lang"] - else: - print(request.text) - return None - except Exception: - print_exc() - return None - - - def __repr__(self) -> str: - return "DeepL Translate" diff --git a/translatepy/translators/google.py b/translatepy/translators/google.py index f0e45440..da52308e 100644 --- a/translatepy/translators/google.py +++ b/translatepy/translators/google.py @@ -7,326 +7,301 @@ Heavily inspired by ssut/googletrans and https://kovatch.medium.com/deciphering-google-batchexecute-74991e4e446c """ -from typing import Union from requests import get, post from json import loads, dumps -from urllib.parse import quote -from traceback import print_exc +# from icecream import ic import pyuseragents +from translatepy.translators.base import BaseTranslator +from translatepy.exceptions import UnsupportedMethod from translatepy.utils.gtoken import TokenAcquirer -from translatepy.utils.annotations import Tuple from translatepy.utils.utils import convert_to_float -HEADERS = { - 'User-Agent': pyuseragents.random() -} +HEADERS = {'User-Agent': pyuseragents.random()} -def _request(text, destination, source): +class GoogleTranslateV1(BaseTranslator): """ - Makes a translation request to Google Translate RPC API - - Most of the code comes from https://github.com/ssut/py-googletrans/pull/255 - """ - rpc_request = dumps([[ - [ - 'MkEWBc', - dumps([[str(text), str(source), str(destination), True],[None]], separators=(',', ':')), - None, - 'generic', - ], - ]], separators=(',', ':')) - data = { - "f.req": rpc_request - } - params = { - 'rpcids': "MkEWBc", - 'bl': 'boq_translate-webserver_20201207.13_p0', - 'soc-app': 1, - 'soc-platform': 1, - 'soc-device': 1, - 'rt': 'c', - } - request = post('https://translate.google.com/_/TranslateWebserverUi/data/batchexecute', params=params, data=data) - if request.status_code < 400: - return request.text - return None - -def _parse_response(data): + A Python implementation of Google Translate's RPC API """ - Parses the broken JSON response given by the new RPC API endpoint (batchexecute) + def _request(self, text, destination, source): + """ + Makes a translation request to Google Translate RPC API - Most of the code comes from https://github.com/ssut/py-googletrans/pull/255 - """ - token_found = False - resp = "" - opening_bracket = 0 - closing_bracket = 0 - # broken json parsing - for line in data.split('\n'): - token_found = token_found or '"MkEWBc"' in line[:30] - if not token_found: - continue - - is_in_string = False - for index, char in enumerate(line): - if char == '\"' and line[max(0, index - 1)] != '\\': - is_in_string = not is_in_string - if not is_in_string: - if char == '[': - opening_bracket += 1 - elif char == ']': - closing_bracket += 1 - - resp += line - if opening_bracket == closing_bracket: - break - - return loads(loads(resp)[0][2]) - - -def _new_translate(text, destination_language, source_language): - """ - Translates the given text to the destination language with the new batchexecute API + Most of the code comes from https://github.com/ssut/py-googletrans/pull/255 + """ + rpc_request = dumps([[ + [ + 'MkEWBc', + dumps([[text, source, destination, True], [None]], separators=(',', ':')), + None, + 'generic', + ], + ]], separators=(',', ':')) + data = { + "f.req": rpc_request + } + params = { + 'rpcids': "MkEWBc", + 'bl': 'boq_translate-webserver_20201207.13_p0', + 'soc-app': 1, + 'soc-platform': 1, + 'soc-device': 1, + 'rt': 'c', + } + request = post('https://translate.google.com/_/TranslateWebserverUi/data/batchexecute', params=params, data=data) + if request.status_code < 400: + return request.text + + def _parse_response(self, data): + """ + Parses the broken JSON response given by the new RPC API endpoint (batchexecute) - Heavily inspired by ssut/googletrans and https://kovatch.medium.com/deciphering-google-batchexecute-74991e4e446c - """ - try: - request = _request(text, destination_language, source_language) - parsed = _parse_response(request) + Most of the code comes from https://github.com/ssut/py-googletrans/pull/255 + """ + token_found = False + resp = "" + opening_bracket = 0 + closing_bracket = 0 + # broken json parsing + for line in data.split('\n'): + token_found = token_found or '"MkEWBc"' in line[:30] + if not token_found: + continue + + is_in_string = False + for index, char in enumerate(line): + if char == '\"' and line[max(0, index - 1)] != '\\': + is_in_string = not is_in_string + if not is_in_string: + if char == '[': + opening_bracket += 1 + elif char == ']': + closing_bracket += 1 + + resp += line + if opening_bracket == closing_bracket: + break + + return loads(loads(resp)[0][2]) + + def _translate(self, text: str, destination_language: str, source_language: str) -> str: + """ + Translates the given text to the destination language with the new batchexecute API + + Heavily inspired by ssut/googletrans and https://kovatch.medium.com/deciphering-google-batchexecute-74991e4e446c + """ + request = self._request(text, destination_language, source_language) + parsed = self._parse_response(request) translated = (' ' if parsed[1][0][0][3] else '').join([part[0] for part in parsed[1][0][0][5]]) - source_language = str(source_language) + """ + try: - source_language = parsed[2] - except Exception: pass + _source_language = parsed[2] + except Exception: + pass + else: + if _source_language is not None: + source_language = _source_language - if source_language.lower() == 'auto': + if source_language == 'auto': try: source_language = parsed[0][2] - except Exception: pass + except Exception: + pass if source_language == 'auto' or source_language is None: try: source_language = parsed[0][1][1][0] - except Exception: pass + except Exception: + pass - return source_language, translated - except Exception: - return None, None + """ + return translated -def _new_language(text): - """ - Returns the language of the given text with the new batchexecute API + def _transliterate(self, text: str, destination_language: str, source_language: str) -> str: + request = self._request(text, destination_language, source_language) + parsed = self._parse_response(request) - Heavily inspired by ssut/googletrans and https://kovatch.medium.com/deciphering-google-batchexecute-74991e4e446c - """ - try: - request = _request(text, "en", "auto") - parsed = _parse_response(request) + """ - source_language = None try: source_language = parsed[2] - except Exception: pass + except Exception: + pass - if source_language == 'auto' or source_language is None: + if source_language == 'auto': try: source_language = parsed[0][2] - except Exception: pass + except Exception: + pass if source_language == 'auto' or source_language is None: try: source_language = parsed[0][1][1][0] - except Exception: pass + except Exception: + pass - return source_language - except Exception: - return None + """ -class GoogleTranslate(): - """A Python implementation of Google Translate's APIs""" - def __init__(self) -> None: try: - self.token_acquirer = TokenAcquirer() + origin_pronunciation = parsed[0][0] except Exception: - self.token_acquirer = None - - def translate(self, text, destination_language, source_language="auto") -> Union[Tuple[str, str], Tuple[None, None]]: - """ - Translates the given text to the given language + origin_pronunciation = None - Args: - text: param destination_language: - source_language: Default value = "auto") - destination_language: + return origin_pronunciation - Returns: - Tuple(str, str) --> tuple with source_lang, translation - None, None --> when an error occurs + def _language(self, text): + """ + Returns the language of the given text with the new batchexecute API + Heavily inspired by ssut/googletrans and https://kovatch.medium.com/deciphering-google-batchexecute-74991e4e446c """ + request = self._request(text, "en", "auto") + parsed = self._parse_response(request) + try: - if source_language is None: - source_language = "auto" - src, result = _new_translate(text, destination_language, source_language) - if src is not None and result is not None: - return src, result - text = quote(str(text), safe='') - request = get("https://translate.googleapis.com/translate_a/single?client=gtx&dt=t&sl=" + str(source_language) + "&tl=" + str(destination_language) + "&q=" + text) - if request.status_code < 400: - data = loads(request.text) - return data[2], "".join([sentence[0] for sentence in data[0]]) - else: - request = get("https://clients5.google.com/translate_a/t?client=dict-chrome-ex&sl=" + str(source_language) + "&tl=" + str(destination_language) + "&q=" + text, headers=HEADERS) - if request.status_code < 400: - data = loads(request.text) - try: - return data['ld_result']["srclangs"][0], "".join((sentence["trans"] if "trans" in sentence else "") for sentence in data["sentences"]) - except Exception: - try: - return data[0][0][2], "".join(sentence for sentence in data[0][0][0][0]) - except Exception: - pass - - request = get("https://translate.googleapis.com/translate_a/single?dt=t&dt=bd&dt=ex&dt=ld&dt=md&dt=qca&dt=rw&dt=rm&dt=ss&dt=t&dt=at&client=gtx&q=" + text + "&hl=" + str(destination_language) + "&sl=" + str(source_language) + "&tl=" + str(destination_language) + "&dj=1&source=bubble") - if request.status_code < 400: - data = loads(request.text) - src = data.get("src", None) - if src is None: - src = data.get("ld_result", {}).get("srclangs", [None])[0] - if src is None: - src = data.get("ld_result", {}).get("extended_srclangs", [None])[0] - return src, " ".join([sentence["trans"] for sentence in data["sentences"] if "trans" in sentence]) - request = get("https://translate.googleapis.com/translate_a/single?client=gtx&dt=t&dt=bd&dj=1&source=input&q=" + text + "&sl=" + str(source_language) + "&tl=" + str(destination_language)) - if request.status_code < 400: - data = loads(request.text) - return data["src"], " ".join([sentence["trans"] for sentence in data["sentences"] if "trans" in sentence]) - return None, None + source_language = parsed[2] except Exception: - return None, None + source_language = None - def transliterate(self, text, source_language=None) -> Union[Tuple[str, str], Tuple[None, None]]: - """ - Transliterate the given text - - Args: - text: param destination_language: - source_language: Default value = "auto") + if source_language == 'auto' or source_language is None: + try: + source_language = parsed[0][2] + except Exception: + pass - Returns: - Tuple(str, str) --> tuple with source_lang, translation - None, None --> when an error occurs + if source_language == 'auto' or source_language is None: + try: + source_language = parsed[0][1][1][0] + except Exception: + pass - """ - try: - if source_language is None: - source_language = "auto" - request = _request(text, "en", source_language) - parsed = _parse_response(request) + return source_language - source_language = str(source_language) - try: - source_language = parsed[2] - except Exception: pass + def _supported_languages(self): + raise UnsupportedMethod() - if source_language.lower() == 'auto': - try: - source_language = parsed[0][2] - except Exception: pass + def _example(self, text): + raise UnsupportedMethod() - if source_language == 'auto' or source_language is None: - try: - source_language = parsed[0][1][1][0] - except Exception: pass + def _dictionary(self, text, destination_language, source_language): + raise UnsupportedMethod() - origin_pronunciation = None - try: - origin_pronunciation = parsed[0][0] - except Exception: pass - - if origin_pronunciation is not None: - return source_language, origin_pronunciation - request = get("https://translate.googleapis.com/translate_a/single?dt=t&dt=bd&dt=ex&dt=ld&dt=md&dt=qca&dt=rw&dt=rm&dt=ss&dt=t&dt=at&client=gtx&q=" + text + "&hl=" + str(destination_language) + "&sl=" + str(source_language) + "&tl=" + str(destination_language) + "&dj=1&source=bubble") - if request.status_code < 400: - data = loads(request.text) - src = data.get("src", None) - if src is None: - src = data.get("ld_result", {}).get("srclangs", [None])[0] - if src is None: - src = data.get("ld_result", {}).get("extended_srclangs", [None])[0] - return src, " ".join([sentence["src_translit"] for sentence in data["sentences"] if "src_translit" in sentence]) - return None, None - except Exception: - return None, None + def _language_normalize(self, language): + return language.alpha2 - def define(self): - """Returns the definition of the given word""" - raise NotImplementedError + def _spellcheck(self, text, source_language): + raise UnsupportedMethod() - def text_to_speech(self, text, source_language=None) -> Union[bytes, None]: - """ - Gives back the text to speech result for the given text + def __repr__(self): + return "Google Translate" - Args: - text: - Returns: - bytes --> the mp3 file as bytes - None --> when an error occurs - """ +class GoogleTranslateV2(BaseTranslator): + """ + A Python implementation of Google Translate's APIs + """ + def __init__(self) -> None: try: - text = quote(str(text), safe='') - if source_language is None: - source_language = self.language(text) - if source_language is None: - return None - request = get("https://translate.googleapis.com/translate_tts?client=gtx&ie=UTF-8&tl=" + str(source_language) + "&q=" + text) - if request.status_code == 200: - return request.content - request = get("https://translate.google.com/translate_tts?client=tw-ob&q=" + text + "&tl=" + str(source_language)) - if request.status_code == 200: - return request.content - if self.token_acquirer is None: - return None - textlen = len(text) - token = self.token_acquirer.do(text) - if token is None: - return None - request = get("https://translate.google.com/translate_tts?ie=UTF-8&q=" + text + "&tl=" + source_language + "&total=1&idx=0&textlen=" + textlen + "&tk=" + str(token) + "&client=webapp&prev=input&ttsspeed=" + str(convert_to_float(speed))) - if request.status_code < 400: - return request.content - else: - return None + self.token_acquirer = TokenAcquirer() except Exception: - print_exc() + self.token_acquirer = None + + def _translate(self, text: str, destination_language: str, source_language: str) -> str: + params = {"client": "gtx", "dt": "t", "sl": source_language, "tl": destination_language, "q": text} + request = get("https://translate.googleapis.com/translate_a/single", params=params) + response = request.json() + # ic(response) + if request.status_code < 400: + return "".join([sentence[0] for sentence in response[0]]) + + params = {"client": "dict-chrome-ex", "sl": source_language, "tl": destination_language, "q": text} + request = get("https://clients5.google.com/translate_a/t", headers=HEADERS, params=params) + response = request.json() + if request.status_code < 400: + try: + return "".join((sentence["trans"] if "trans" in sentence else "") for sentence in response["sentences"]) + except Exception: + return "".join(sentence for sentence in response[0][0][0][0]) + + params = {"dt": ["t", "bd", "ex", "ld", "md", "qca", "rw", "rm", "ss", "t", "at"], "client": "gtx", "q": text, "hl": destination_language, "sl": source_language, "tl": destination_language, "dj": "1", "source": "bubble"} + request = get("https://translate.googleapis.com/translate_a/single", params=params) + response = request.json() + if request.status_code < 400: + return " ".join([sentence["trans"] for sentence in response["sentences"] if "trans" in sentence]) + + params = {"client": "gtx", "dt": ["t", "bd"], "dj": "1", "source": "input", "q": text, "sl": source_language, "tl": destination_language} + request = get("https://translate.googleapis.com/translate_a/single", params=params) + response = request.json() + if request.status_code < 400: + return "".join([sentence["trans"] for sentence in response["sentences"] if "trans" in sentence]) + + def _transliterate(self, text: str, destination_language: str, source_language: str) -> str: + params = {"dt": ["t", "bd", "ex", "ld", "md", "qca", "rw", "rm", "ss", "t", "at"], "client": "gtx", "q": text, "hl": destination_language, "sl": source_language, "tl": destination_language, "dj": "1", "source": "bubble"} + request = get("https://translate.googleapis.com/translate_a/single", params=params) + response = request.json() + if request.status_code < 400: + return " ".join([sentence["src_translit"] for sentence in response["sentences"] if "src_translit" in sentence]) + + # def define(self): + # """Returns the definition of the given word""" + # raise NotImplementedError + + def _text_to_speech(self, text: str, speed: int, source_language="auto") -> bytes: + if source_language == "auto": + source_language = self._language(text) + + params = {"client": "gtx", "ie": "UTF-8", "tl": source_language, "q": text} + request = get("https://translate.googleapis.com/translate_tts", params=params) + if request.status_code == 200: + return request.content + + params = {"client": "tw-ob", "q": text, "tl": source_language} + request = get("https://translate.google.com/translate_tts", params=params) + if request.status_code == 200: + return request.content + + if self.token_acquirer is None: + return None + textlen = len(text) + token = self.token_acquirer.do(text) + if token is None: return None + params = {"ie": "UTF-8", "q": text, "tl": source_language, "total": "1", "idx": "0", "textlen": textlen, "tk": token, "client": "webapp", "prev": "input", "ttsspeed": convert_to_float(speed)} + request = get("https://translate.google.com/translate_tts", params=params) + if request.status_code < 400: + return request.content - def language(self, text) -> Union[str, None]: - """ - Gives back the language of the given text + def _language(self, text: str) -> str: + params = {"client": "gtx", "dt": "t", "sl": "auto", "tl": "ja", "q": text} + request = get("https://translate.googleapis.com/translate_a/single", params=params) + response = request.json() + if request.status_code < 400: + return response[2] - Args: - text: + params = {"client": "dict-chrome-ex", "sl": "auto", "tl": "ja", "q": text} + request = get("https://clients5.google.com/translate_a/t", params=params, headers=HEADERS) + response = request.json() + if request.status_code < 400: + return response['ld_result']["srclangs"][0] - Returns: - str --> the language code - """ - try: - lang = _new_language(text) - if lang is not None: - return lang - text = quote(str(text), safe='') - request = get("https://translate.googleapis.com/translate_a/single?client=gtx&dt=t&sl=auto&tl=ja&q=" + text) - if request.status_code < 400: - return loads(request.text)[2] - else: - request = get("https://clients5.google.com/translate_a/t?client=dict-chrome-ex&sl=auto&tl=ja&q=" + text, headers=HEADERS) - if request.status_code < 400: - return loads(request.text)['ld_result']["srclangs"][0] - else: - return None - except Exception: - return None + def _supported_languages(self): + raise UnsupportedMethod() + + def _example(self, text): + raise UnsupportedMethod() + + def _dictionary(self, text, destination_language, source_language): + raise UnsupportedMethod() + + def _language_normalize(self, language): + return language.alpha2 + + def _spellcheck(self, text, source_language): + raise UnsupportedMethod() + + def __repr__(self): + return "Google Translate" diff --git a/translatepy/translators/reverso.py b/translatepy/translators/reverso.py index 983999b0..ad4c37a0 100644 --- a/translatepy/translators/reverso.py +++ b/translatepy/translators/reverso.py @@ -1,11 +1,11 @@ import requests +from translatepy.language import Language from translatepy.translators.base import BaseTranslator from translatepy.exceptions import UnsupportedMethod from icecream import ic import pyuseragents -from translatepy.utils.annotations import List, Tuple HEADERS = { # "Host": "api.reverso.net", @@ -49,12 +49,8 @@ def _transliterate(self, text: str, destination_language: str, source_language: if source_language == "auto": source_language = self._language(text) - # TODO: FIX CIRCULAR IMPORT # XXX: when sending a request, alpha3 language codes are used to translate and check the spelling of the text, and here it is alpha2 that is needed - # fixes circular import Language - from translatepy import Language - destination_language = Language(destination_language).alpha2 source_language = Language(source_language).alpha2 @@ -62,6 +58,7 @@ def _transliterate(self, text: str, destination_language: str, source_language: params = {"source_text": text, "source_lang": source_language, "target_lang": destination_language, "npage": 1, "nrows": 20, "expr_sug": 0, "json": 1, "dym_apply": "true", "pos_reorder": 5} request = requests.get(url, params=params, headers=HEADERS) response = request.json() + ic(response) if request.status_code < 400: return response["dictionary_entry_list"][0]["transliteration2"] @@ -117,9 +114,6 @@ def _supported_languages(self): def _example(self, text: str, destination_language: str, source_language: str): # TODO: nrows value - # fixes circular import Language - from translatepy import Language - destination_language = Language(destination_language).alpha2 source_language = Language(source_language).alpha2 @@ -135,9 +129,6 @@ def _example(self, text: str, destination_language: str, source_language: str): return response["list"] def _dictionary(self, text: str, destination_language: str, source_language: str): - # fixes circular import Language - from translatepy import Language - destination_language = Language(destination_language).alpha2 source_language = Language(source_language).alpha2 diff --git a/translatepy/translators/yandex.py b/translatepy/translators/yandex.py index dd80d26c..ff9781ef 100644 --- a/translatepy/translators/yandex.py +++ b/translatepy/translators/yandex.py @@ -7,7 +7,7 @@ import requests from translatepy.translators.base import BaseTranslator -from translatepy.exceptions import TranslationError, UnsupportedMethod +from translatepy.exceptions import UnsupportedMethod import uuid @@ -52,8 +52,7 @@ class YandexTranslate(BaseTranslator): Yandex Translation Implementation """ - _api_url = "https://translate.yandex.net/api/{version}/tr.json/{endpoint}" - _api_version = "v1" + _api_url = "https://translate.yandex.net/api/v1/tr.json/{endpoint}" @timed_lru_cache(120) # Store UUID value within 120 seconds def _ucid(self) -> str: @@ -74,7 +73,7 @@ def _translate(self, text: str, destination_language: str, source_language: str) if source_language == "auto": source_language = self._language(text) - url = self._api_url.format(version=self._api_version, endpoint="translate") + url = self._api_url.format(endpoint="translate") params = {"ucid": self._ucid(), "srv": "android", "format": "text"} data = {"text": text, "lang": source_language + "-" + destination_language} request = requests.post(url, headers=HEADERS, params=params, data=data) @@ -110,7 +109,7 @@ def _spellcheck(self, text: str, source_language: str) -> str: return text def _language(self, text: str): - url = self._api_url.format(version=self._api_version, endpoint="detect") + url = self._api_url.format(endpoint="detect") params = {"ucid": self._ucid(), "srv": "android"} data = {'text': text, 'hint': "en"} request = requests.get(url, params=params, data=data, headers=HEADERS) @@ -121,7 +120,7 @@ def _language(self, text: str): def _supported_languages(self): params = {"ucid": self._ucid(), "srv": "android", "ui": "en"} - url = self._api_url.format(version=self._api_version, endpoint="getLangs") + url = self._api_url.format(endpoint="getLangs") request = requests.get(url, params=params, headers=HEADERS) response = request.json() diff --git a/translatepy/utils/request.py b/translatepy/utils/request.py index 019a0cdc..dbb857a8 100644 --- a/translatepy/utils/request.py +++ b/translatepy/utils/request.py @@ -15,7 +15,7 @@ class Request(): def __init__(self, proxy_urls=None): self.session = requests.Session() - self.session.headers.update(HEADERS) + # self.session.headers.update(HEADERS) def post(self, url, *args, **kwargs): result = self.session.post(url, *args, **kwargs)