Implement new Google TTS API via dedicated library (#43863)

Co-authored-by: Joakim Sørensen <hi@ludeeus.dev> Co-authored-by: Paulus Schoutsen <paulus@home-assistant.io> Co-authored-by: Paulus Schoutsen <balloob@gmail.com>
home-assistant · Dec 2, 2020 · ce05665 · ce05665
1 parent 30baf33
commit ce05665
Show file tree

Hide file tree

Showing 5 changed files with 201 additions and 334 deletions.
diff --git a/homeassistant/components/google_translate/manifest.json b/homeassistant/components/google_translate/manifest.json
@@ -2,6 +2,6 @@
   "domain": "google_translate",
   "name": "Google Translate Text-to-Speech",
   "documentation": "https://www.home-assistant.io/integrations/google_translate",
-  "requirements": ["gTTS-token==1.1.4"],
+  "requirements": ["gTTS==2.2.1"],
   "codeowners": []
 }
diff --git a/homeassistant/components/google_translate/tts.py b/homeassistant/components/google_translate/tts.py
@@ -1,78 +1,95 @@
 """Support for the Google speech service."""
-import asyncio
+from io import BytesIO
 import logging
-import re
 
-import aiohttp
-from aiohttp.hdrs import REFERER, USER_AGENT
-import async_timeout
-from gtts_token import gtts_token
+from gtts import gTTS, gTTSError
 import voluptuous as vol
 
 from homeassistant.components.tts import CONF_LANG, PLATFORM_SCHEMA, Provider
-from homeassistant.const import HTTP_OK
-from homeassistant.helpers.aiohttp_client import async_get_clientsession
 
 _LOGGER = logging.getLogger(__name__)
 
-GOOGLE_SPEECH_URL = "https://translate.google.com/translate_tts"
-MESSAGE_SIZE = 148
-
 SUPPORT_LANGUAGES = [
     "af",
-    "sq",
     "ar",
-    "hy",
     "bn",
+    "bs",
     "ca",
-    "zh",
-    "zh-cn",
-    "zh-tw",
-    "zh-yue",
-    "hr",
     "cs",
+    "cy",
     "da",
-    "nl",
+    "de",
+    "el",
     "en",
-    "en-au",
-    "en-uk",
-    "en-us",
     "eo",
+    "es",
+    "et",
     "fi",
     "fr",
-    "de",
-    "el",
+    "gu",
     "hi",
+    "hr",
     "hu",
-    "is",
+    "hy",
     "id",
+    "is",
     "it",
     "ja",
+    "jw",
+    "km",
+    "kn",
     "ko",
     "la",
     "lv",
     "mk",
+    "ml",
+    "mr",
+    "my",
+    "ne",
+    "nl",
     "no",
     "pl",
     "pt",
-    "pt-br",
     "ro",
     "ru",
-    "sr",
+    "si",
     "sk",
-    "es",
-    "es-es",
-    "es-mx",
-    "es-us",
-    "sw",
+    "sq",
+    "sr",
+    "su",
     "sv",
+    "sw",
     "ta",
+    "te",
     "th",
+    "tl",
     "tr",
-    "vi",
-    "cy",
     "uk",
-    "bg-BG",
+    "ur",
+    "vi",
+    # dialects
+    "zh-CN",
+    "zh-cn",
+    "zh-tw",
+    "en-us",
+    "en-ca",
+    "en-uk",
+    "en-gb",
+    "en-au",
+    "en-gh",
+    "en-in",
+    "en-ie",
+    "en-nz",
+    "en-ng",
+    "en-ph",
+    "en-za",
+    "en-tz",
+    "fr-ca",
+    "fr-fr",
+    "pt-br",
+    "pt-pt",
+    "es-es",
+    "es-us",
 ]
 
 DEFAULT_LANG = "en"
@@ -94,14 +111,6 @@ def __init__(self, hass, lang):
         """Init Google TTS service."""
         self.hass = hass
         self._lang = lang
-        self.headers = {
-            REFERER: "http://translate.google.com/",
-            USER_AGENT: (
-                "Mozilla/5.0 (Windows NT 10.0; WOW64) "
-                "AppleWebKit/537.36 (KHTML, like Gecko) "
-                "Chrome/47.0.2526.106 Safari/537.36"
-            ),
-        }
         self.name = "Google"
 
     @property
@@ -114,74 +123,15 @@ def supported_languages(self):
         """Return list of supported languages."""
         return SUPPORT_LANGUAGES
 
-    async def async_get_tts_audio(self, message, language, options=None):
+    def get_tts_audio(self, message, language, options=None):
         """Load TTS from google."""
+        tts = gTTS(text=message, lang=language)
+        mp3_data = BytesIO()
+
+        try:
+            tts.write_to_fp(mp3_data)
+        except gTTSError as exc:
+            _LOGGER.exception("Error during processing of TTS request %s", exc)
+            return None, None
 
-        token = gtts_token.Token()
-        websession = async_get_clientsession(self.hass)
-        message_parts = self._split_message_to_parts(message)
-
-        data = b""
-        for idx, part in enumerate(message_parts):
-            try:
-                part_token = await self.hass.async_add_executor_job(
-                    token.calculate_token, part
-                )
-            except ValueError as err:
-                # If token seed fetching fails.
-                _LOGGER.warning(err)
-                return None, None
-
-            url_param = {
-                "ie": "UTF-8",
-                "tl": language,
-                "q": part,
-                "tk": part_token,
-                "total": len(message_parts),
-                "idx": idx,
-                "client": "tw-ob",
-                "textlen": len(part),
-            }
-
-            try:
-                with async_timeout.timeout(10):
-                    request = await websession.get(
-                        GOOGLE_SPEECH_URL, params=url_param, headers=self.headers
-                    )
-
-                    if request.status != HTTP_OK:
-                        _LOGGER.error(
-                            "Error %d on load URL %s", request.status, request.url
-                        )
-                        return None, None
-                    data += await request.read()
-
-            except (asyncio.TimeoutError, aiohttp.ClientError):
-                _LOGGER.error("Timeout for google speech")
-                return None, None
-
-        return "mp3", data
-
-    @staticmethod
-    def _split_message_to_parts(message):
-        """Split message into single parts."""
-        if len(message) <= MESSAGE_SIZE:
-            return [message]
-
-        punc = "!()[]?.,;:"
-        punc_list = [re.escape(c) for c in punc]
-        pattern = "|".join(punc_list)
-        parts = re.split(pattern, message)
-
-        def split_by_space(fullstring):
-            """Split a string by space."""
-            if len(fullstring) > MESSAGE_SIZE:
-                idx = fullstring.rfind(" ", 0, MESSAGE_SIZE)
-                return [fullstring[:idx]] + split_by_space(fullstring[idx:])
-            return [fullstring]
-
-        msg_parts = []
-        for part in parts:
-            msg_parts += split_by_space(part)
-
-        return [msg for msg in msg_parts if len(msg) > 0]
+        return "mp3", mp3_data.getvalue()
diff --git a/requirements_all.txt b/requirements_all.txt
@@ -622,7 +622,7 @@ freesms==0.1.2
 fritzconnection==1.3.4
 
 # homeassistant.components.google_translate
-gTTS-token==1.1.4
+gTTS==2.2.1
 
 # homeassistant.components.garmin_connect
 garminconnect==0.1.16

diff --git a/requirements_test_all.txt b/requirements_test_all.txt
@@ -308,7 +308,7 @@ fnvhash==0.1.0
 foobot_async==1.0.0
 
 # homeassistant.components.google_translate
-gTTS-token==1.1.4
+gTTS==2.2.1
 
 # homeassistant.components.garmin_connect
 garminconnect==0.1.16