Skip to content

Commit

Permalink
Implementing a new Bing Translate implementation
Browse files Browse the repository at this point in the history
  • Loading branch information
ZhymabekRoman committed May 27, 2021
1 parent faf4b27 commit 80ce159
Show file tree
Hide file tree
Showing 2 changed files with 78 additions and 41 deletions.
12 changes: 12 additions & 0 deletions stress_test_bing.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
from translatepy.translators.bing import BingTranslate
from time import sleep

dl = BingTranslate()
while True:
result = dl.translate("hello", "russian")
print(result)

result1 = dl.translate("hello, world!", "russian")
print(result1)

dl.clean_cache()
107 changes: 66 additions & 41 deletions translatepy/translators/bing.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,13 @@
from typing import Union
"""
This implementation was made specifically for translatepy from 'Zhymabek Roman', based on 'Anime no Sekai' version.
"""

import re
import json
import requests

import pyuseragents

from translatepy.translators.base import BaseTranslator
from translatepy.exceptions import UnsupportedMethod
from translatepy.utils.annotations import Tuple
Expand All @@ -20,8 +25,6 @@
"Connection": "keep-alive"
}

PARAMS = {'IID': 'translator.5033.3'}

# TODO: read documentation: https://docs.microsoft.com/ru-ru/azure/cognitive-services/translator/language-support

class Example():
Expand Down Expand Up @@ -59,11 +62,49 @@ def __repr__(self) -> str:
return str(self.source)


class BingSessionManager():
def __init__(self):
self._parse_authorization_data()

def _parse_authorization_data(self):
_page = requests.get("https://www.bing.com/translator").text
_parsed_IG = re.findall('IG:"(.*?)"', _page)
_parsed_IID = re.findall('data-iid="(.*?)"', _page)
_parsed_helper_info = re.findall("params_RichTranslateHelper = (.*?);", _page)

_normalized_key = json.loads(_parsed_helper_info[0])[0]
_normalized_token = json.loads(_parsed_helper_info[0])[1]

self.ig = _parsed_IG[0]
self.iid = _parsed_IID[0]
self.key = _normalized_key
self.token = _normalized_token

def send(self, url, data):
while True:
_params = {'IG': self.ig, 'IID': "self.iid.{}".format(1)}
_data = {'token': self.token, 'key': self.key}
_data.update(data)

request = requests.post(url, params=_params, data=_data, headers=HEADERS)
response = request.json()

if isinstance(response, dict):
if response.get("statusCode", 200) == 400:
self._parse_authorization_data()
continue

return response


class BingTranslate(BaseTranslator):
"""
A Python implementation of Microsoft Bing Translation's APIs
"""

def __init__(self):
self.session_manager = BingSessionManager()

def _translate(self, text: str, destination_language: str, source_language: str) -> str:
"""
Translates the given text to the given language
Expand All @@ -77,14 +118,8 @@ def _translate(self, text: str, destination_language: str, source_language: str)
Tuple(str, str) --> tuple with source_lang, translation
"""
ic(text)
ic(destination_language)
ic(source_language)
request = requests.post("https://www.bing.com/ttranslatev3", headers=HEADERS, params=PARAMS, data={'text': text, 'fromLang': source_language, 'to': destination_language})
response = request.json()
ic(response)
if request.status_code < 400:
return response[0]["translations"][0]["text"]
response = self.session_manager.send("https://www.bing.com/ttranslatev3", data={'text': text, 'fromLang': source_language, 'to': destination_language})
return response[0]["translations"][0]["text"]

def _example(self, text, destination_language, source_language, translation) -> str:
"""
Expand All @@ -102,13 +137,11 @@ def _example(self, text, destination_language, source_language, translation) ->
if translation is None:
source_language, translation = self.translate(text, destination_language, source_language)

if source_language == "auto-detect":
source_language = self._language(text)
if source_language == "auto-detect":
source_language = self._language(text)

request = requests.post("https://www.bing.com/texamplev3", headers=HEADERS, params=PARAMS, data={'text': text.lower(), 'from': source_language, 'to': destination_language, 'translation': translation.lower()})
response = requests.json()
if request.status_code < 400:
return [Example(example) for example in response[0]["examples"]]
response = self.session_manager.send("https://www.bing.com/texamplev3", data={'text': text.lower(), 'from': source_language, 'to': destination_language, 'translation': translation.lower()})
return [Example(example) for example in response[0]["examples"]]

def _spellcheck(self, text: str, source_language: str) -> str:
"""
Expand All @@ -123,14 +156,12 @@ def _spellcheck(self, text: str, source_language: str) -> str:
"""
if source_language == "auto-detect":
source_language = self._language(text)
request = requests.post("https://www.bing.com/tspellcheckv3", headers=HEADERS, params=PARAMS, data={'text': text, 'fromLang': source_language})
response = request.json()
ic(response)
if request.status_code < 400:
result = response["correctedText"]
if result == "":
return text
return result

response = self.session_manager.send("https://www.bing.com/tspellcheckv3", data={'text': text, 'fromLang': source_language})
result = response["correctedText"]
if result == "":
return text
return result

def _language(self, text: str) -> str:
"""
Expand All @@ -143,25 +174,19 @@ def _language(self, text: str) -> str:
str --> the language code
"""
request = requests.post("https://www.bing.com/ttranslatev3", headers=HEADERS, params=PARAMS, data={'text': text, 'fromLang': "auto-detect", 'to': "en"})
response = request.json()
ic(response)
if request.status_code < 400:
return response[0]["detectedLanguage"]["language"]
response = self.session_manager.send("https://www.bing.com/ttranslatev3", data={'text': text, 'fromLang': "auto-detect", 'to': "en"})
return response[0]["detectedLanguage"]["language"]

def _transliterate(self, text: str, destination_language: str, source_language: str):
# TODO: alternative implementation, won't work
# request = requests.post("https://www.bing.com/ttransliteratev3", headers=HEADERS, params=PARAMS, data={'text': text, 'language': source_language, 'toScript': destination_language})

request = requests.post("https://www.bing.com/ttranslatev3", headers=HEADERS, params=PARAMS, data={'text': text, 'fromLang': source_language, 'to': destination_language})
response = request.json()
ic(response)
if request.status_code < 400:
# XXX: Not a predictable response from Bing Translate
try:
return response[1]["inputTransliteration"]
except IndexError:
return response[0]["translations"][0]["transliteration"]["text"]
# request = requests.post("https://www.bing.com/ttransliteratev3", data={'text': text, 'language': source_language, 'toScript': destination_language})

response = self.session_manager.send("https://www.bing.com/ttranslatev3", data={'text': text, 'fromLang': source_language, 'to': destination_language})
# XXX: Not a predictable response from Bing Translate
try:
return response[1]["inputTransliteration"]
except IndexError:
return response[0]["translations"][0]["transliteration"]["text"]

def _dictionary(self, text: str, destination_language: str, source_language: str):
# TODO: Implement
Expand Down

0 comments on commit 80ce159

Please sign in to comment.