From ce056656f85e46d342223f0415c5e5ae94d3466e Mon Sep 17 00:00:00 2001
From: Marvin Wichmann <marvin@fam-wichmann.de>
Date: Wed, 2 Dec 2020 22:03:31 +0100
Subject: [PATCH] Implement new Google TTS API via dedicated library (#43863)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Co-authored-by: Joakim Sørensen <hi@ludeeus.dev>
Co-authored-by: Paulus Schoutsen <paulus@home-assistant.io>
Co-authored-by: Paulus Schoutsen <balloob@gmail.com>
---
 .../components/google_translate/manifest.json |   2 +-
 .../components/google_translate/tts.py        | 174 +++------
 requirements_all.txt                          |   2 +-
 requirements_test_all.txt                     |   2 +-
 tests/components/google_translate/test_tts.py | 355 +++++++-----------
 5 files changed, 201 insertions(+), 334 deletions(-)

diff --git a/homeassistant/components/google_translate/manifest.json b/homeassistant/components/google_translate/manifest.json
index 6d40b2f7a09e0..c5b3edc879869 100644
--- a/homeassistant/components/google_translate/manifest.json
+++ b/homeassistant/components/google_translate/manifest.json
@@ -2,6 +2,6 @@
   "domain": "google_translate",
   "name": "Google Translate Text-to-Speech",
   "documentation": "https://www.home-assistant.io/integrations/google_translate",
-  "requirements": ["gTTS-token==1.1.4"],
+  "requirements": ["gTTS==2.2.1"],
   "codeowners": []
 }
diff --git a/homeassistant/components/google_translate/tts.py b/homeassistant/components/google_translate/tts.py
index 66c00008046e4..c9a5eef2c834f 100644
--- a/homeassistant/components/google_translate/tts.py
+++ b/homeassistant/components/google_translate/tts.py
@@ -1,78 +1,95 @@
 """Support for the Google speech service."""
-import asyncio
+from io import BytesIO
 import logging
-import re
 
-import aiohttp
-from aiohttp.hdrs import REFERER, USER_AGENT
-import async_timeout
-from gtts_token import gtts_token
+from gtts import gTTS, gTTSError
 import voluptuous as vol
 
 from homeassistant.components.tts import CONF_LANG, PLATFORM_SCHEMA, Provider
-from homeassistant.const import HTTP_OK
-from homeassistant.helpers.aiohttp_client import async_get_clientsession
 
 _LOGGER = logging.getLogger(__name__)
 
-GOOGLE_SPEECH_URL = "https://translate.google.com/translate_tts"
-MESSAGE_SIZE = 148
-
 SUPPORT_LANGUAGES = [
     "af",
-    "sq",
     "ar",
-    "hy",
     "bn",
+    "bs",
     "ca",
-    "zh",
-    "zh-cn",
-    "zh-tw",
-    "zh-yue",
-    "hr",
     "cs",
+    "cy",
     "da",
-    "nl",
+    "de",
+    "el",
     "en",
-    "en-au",
-    "en-uk",
-    "en-us",
     "eo",
+    "es",
+    "et",
     "fi",
     "fr",
-    "de",
-    "el",
+    "gu",
     "hi",
+    "hr",
     "hu",
-    "is",
+    "hy",
     "id",
+    "is",
     "it",
     "ja",
+    "jw",
+    "km",
+    "kn",
     "ko",
     "la",
     "lv",
     "mk",
+    "ml",
+    "mr",
+    "my",
+    "ne",
+    "nl",
     "no",
     "pl",
     "pt",
-    "pt-br",
     "ro",
     "ru",
-    "sr",
+    "si",
     "sk",
-    "es",
-    "es-es",
-    "es-mx",
-    "es-us",
-    "sw",
+    "sq",
+    "sr",
+    "su",
     "sv",
+    "sw",
     "ta",
+    "te",
     "th",
+    "tl",
     "tr",
-    "vi",
-    "cy",
     "uk",
-    "bg-BG",
+    "ur",
+    "vi",
+    # dialects
+    "zh-CN",
+    "zh-cn",
+    "zh-tw",
+    "en-us",
+    "en-ca",
+    "en-uk",
+    "en-gb",
+    "en-au",
+    "en-gh",
+    "en-in",
+    "en-ie",
+    "en-nz",
+    "en-ng",
+    "en-ph",
+    "en-za",
+    "en-tz",
+    "fr-ca",
+    "fr-fr",
+    "pt-br",
+    "pt-pt",
+    "es-es",
+    "es-us",
 ]
 
 DEFAULT_LANG = "en"
@@ -94,14 +111,6 @@ def __init__(self, hass, lang):
         """Init Google TTS service."""
         self.hass = hass
         self._lang = lang
-        self.headers = {
-            REFERER: "http://translate.google.com/",
-            USER_AGENT: (
-                "Mozilla/5.0 (Windows NT 10.0; WOW64) "
-                "AppleWebKit/537.36 (KHTML, like Gecko) "
-                "Chrome/47.0.2526.106 Safari/537.36"
-            ),
-        }
         self.name = "Google"
 
     @property
@@ -114,74 +123,15 @@ def supported_languages(self):
         """Return list of supported languages."""
         return SUPPORT_LANGUAGES
 
-    async def async_get_tts_audio(self, message, language, options=None):
+    def get_tts_audio(self, message, language, options=None):
         """Load TTS from google."""
+        tts = gTTS(text=message, lang=language)
+        mp3_data = BytesIO()
+
+        try:
+            tts.write_to_fp(mp3_data)
+        except gTTSError as exc:
+            _LOGGER.exception("Error during processing of TTS request %s", exc)
+            return None, None
 
-        token = gtts_token.Token()
-        websession = async_get_clientsession(self.hass)
-        message_parts = self._split_message_to_parts(message)
-
-        data = b""
-        for idx, part in enumerate(message_parts):
-            try:
-                part_token = await self.hass.async_add_executor_job(
-                    token.calculate_token, part
-                )
-            except ValueError as err:
-                # If token seed fetching fails.
-                _LOGGER.warning(err)
-                return None, None
-
-            url_param = {
-                "ie": "UTF-8",
-                "tl": language,
-                "q": part,
-                "tk": part_token,
-                "total": len(message_parts),
-                "idx": idx,
-                "client": "tw-ob",
-                "textlen": len(part),
-            }
-
-            try:
-                with async_timeout.timeout(10):
-                    request = await websession.get(
-                        GOOGLE_SPEECH_URL, params=url_param, headers=self.headers
-                    )
-
-                    if request.status != HTTP_OK:
-                        _LOGGER.error(
-                            "Error %d on load URL %s", request.status, request.url
-                        )
-                        return None, None
-                    data += await request.read()
-
-            except (asyncio.TimeoutError, aiohttp.ClientError):
-                _LOGGER.error("Timeout for google speech")
-                return None, None
-
-        return "mp3", data
-
-    @staticmethod
-    def _split_message_to_parts(message):
-        """Split message into single parts."""
-        if len(message) <= MESSAGE_SIZE:
-            return [message]
-
-        punc = "!()[]?.,;:"
-        punc_list = [re.escape(c) for c in punc]
-        pattern = "|".join(punc_list)
-        parts = re.split(pattern, message)
-
-        def split_by_space(fullstring):
-            """Split a string by space."""
-            if len(fullstring) > MESSAGE_SIZE:
-                idx = fullstring.rfind(" ", 0, MESSAGE_SIZE)
-                return [fullstring[:idx]] + split_by_space(fullstring[idx:])
-            return [fullstring]
-
-        msg_parts = []
-        for part in parts:
-            msg_parts += split_by_space(part)
-
-        return [msg for msg in msg_parts if len(msg) > 0]
+        return "mp3", mp3_data.getvalue()
diff --git a/requirements_all.txt b/requirements_all.txt
index 39398e7b2e934..cb93fc3e7c989 100644
--- a/requirements_all.txt
+++ b/requirements_all.txt
@@ -622,7 +622,7 @@ freesms==0.1.2
 fritzconnection==1.3.4
 
 # homeassistant.components.google_translate
-gTTS-token==1.1.4
+gTTS==2.2.1
 
 # homeassistant.components.garmin_connect
 garminconnect==0.1.16
diff --git a/requirements_test_all.txt b/requirements_test_all.txt
index 02bd6099457df..df7cbdafc63eb 100644
--- a/requirements_test_all.txt
+++ b/requirements_test_all.txt
@@ -308,7 +308,7 @@ fnvhash==0.1.0
 foobot_async==1.0.0
 
 # homeassistant.components.google_translate
-gTTS-token==1.1.4
+gTTS==2.2.1
 
 # homeassistant.components.garmin_connect
 garminconnect==0.1.16
diff --git a/tests/components/google_translate/test_tts.py b/tests/components/google_translate/test_tts.py
index 8e9ec9b7e1c7e..79c303fd2ff61 100644
--- a/tests/components/google_translate/test_tts.py
+++ b/tests/components/google_translate/test_tts.py
@@ -1,8 +1,10 @@
 """The tests for the Google speech platform."""
-import asyncio
 import os
 import shutil
 
+from gtts import gTTSError
+import pytest
+
 from homeassistant.components.media_player.const import (
     ATTR_MEDIA_CONTENT_ID,
     DOMAIN as DOMAIN_MP,
@@ -10,226 +12,141 @@
 )
 import homeassistant.components.tts as tts
 from homeassistant.config import async_process_ha_core_config
-from homeassistant.setup import setup_component
+from homeassistant.setup import async_setup_component
 
 from tests.async_mock import patch
-from tests.common import assert_setup_component, get_test_home_assistant, mock_service
+from tests.common import async_mock_service
 from tests.components.tts.test_init import mutagen_mock  # noqa: F401
 
 
-class TestTTSGooglePlatform:
-    """Test the Google speech component."""
-
-    def setup_method(self):
-        """Set up things to be run when tests are started."""
-        self.hass = get_test_home_assistant()
-
-        asyncio.run_coroutine_threadsafe(
-            async_process_ha_core_config(
-                self.hass, {"internal_url": "http://example.local:8123"}
-            ),
-            self.hass.loop,
-        )
-
-        self.url = "https://translate.google.com/translate_tts"
-        self.url_param = {
-            "tl": "en",
-            "q": "90%25%20of%20I%20person%20is%20on%20front%20of%20your%20door.",
-            "tk": 5,
-            "client": "tw-ob",
-            "textlen": 41,
-            "total": 1,
-            "idx": 0,
-            "ie": "UTF-8",
-        }
-
-    def teardown_method(self):
-        """Stop everything that was started."""
-        default_tts = self.hass.config.path(tts.DEFAULT_CACHE_DIR)
-        if os.path.isdir(default_tts):
-            shutil.rmtree(default_tts)
-
-        self.hass.stop()
-
-    def test_setup_component(self):
-        """Test setup component."""
-        config = {tts.DOMAIN: {"platform": "google_translate"}}
-
-        with assert_setup_component(1, tts.DOMAIN):
-            setup_component(self.hass, tts.DOMAIN, config)
-
-    @patch("gtts_token.gtts_token.Token.calculate_token", autospec=True, return_value=5)
-    def test_service_say(self, mock_calculate, aioclient_mock):
-        """Test service call say."""
-        calls = mock_service(self.hass, DOMAIN_MP, SERVICE_PLAY_MEDIA)
-
-        aioclient_mock.get(self.url, params=self.url_param, status=200, content=b"test")
-
-        config = {tts.DOMAIN: {"platform": "google_translate"}}
-
-        with assert_setup_component(1, tts.DOMAIN):
-            setup_component(self.hass, tts.DOMAIN, config)
-
-        self.hass.services.call(
-            tts.DOMAIN,
-            "google_translate_say",
-            {
-                "entity_id": "media_player.something",
-                tts.ATTR_MESSAGE: "90% of I person is on front of your door.",
-            },
-        )
-        self.hass.block_till_done()
-
-        assert len(calls) == 1
-        assert len(aioclient_mock.mock_calls) == 1
-        assert calls[0].data[ATTR_MEDIA_CONTENT_ID].find(".mp3") != -1
-
-    @patch("gtts_token.gtts_token.Token.calculate_token", autospec=True, return_value=5)
-    def test_service_say_german_config(self, mock_calculate, aioclient_mock):
-        """Test service call say with german code in the config."""
-        calls = mock_service(self.hass, DOMAIN_MP, SERVICE_PLAY_MEDIA)
-
-        self.url_param["tl"] = "de"
-        aioclient_mock.get(self.url, params=self.url_param, status=200, content=b"test")
-
-        config = {tts.DOMAIN: {"platform": "google_translate", "language": "de"}}
-
-        with assert_setup_component(1, tts.DOMAIN):
-            setup_component(self.hass, tts.DOMAIN, config)
-
-        self.hass.services.call(
-            tts.DOMAIN,
-            "google_translate_say",
-            {
-                "entity_id": "media_player.something",
-                tts.ATTR_MESSAGE: "90% of I person is on front of your door.",
-            },
-        )
-        self.hass.block_till_done()
-
-        assert len(calls) == 1
-        assert len(aioclient_mock.mock_calls) == 1
-
-    @patch("gtts_token.gtts_token.Token.calculate_token", autospec=True, return_value=5)
-    def test_service_say_german_service(self, mock_calculate, aioclient_mock):
-        """Test service call say with german code in the service."""
-        calls = mock_service(self.hass, DOMAIN_MP, SERVICE_PLAY_MEDIA)
-
-        self.url_param["tl"] = "de"
-        aioclient_mock.get(self.url, params=self.url_param, status=200, content=b"test")
-
-        config = {
-            tts.DOMAIN: {"platform": "google_translate", "service_name": "google_say"}
-        }
-
-        with assert_setup_component(1, tts.DOMAIN):
-            setup_component(self.hass, tts.DOMAIN, config)
-
-        self.hass.services.call(
-            tts.DOMAIN,
-            "google_say",
-            {
-                "entity_id": "media_player.something",
-                tts.ATTR_MESSAGE: "90% of I person is on front of your door.",
-                tts.ATTR_LANGUAGE: "de",
-            },
-        )
-        self.hass.block_till_done()
-
-        assert len(calls) == 1
-        assert len(aioclient_mock.mock_calls) == 1
-
-    @patch("gtts_token.gtts_token.Token.calculate_token", autospec=True, return_value=5)
-    def test_service_say_error(self, mock_calculate, aioclient_mock):
-        """Test service call say with http response 400."""
-        calls = mock_service(self.hass, DOMAIN_MP, SERVICE_PLAY_MEDIA)
-
-        aioclient_mock.get(self.url, params=self.url_param, status=400, content=b"test")
-
-        config = {tts.DOMAIN: {"platform": "google_translate"}}
-
-        with assert_setup_component(1, tts.DOMAIN):
-            setup_component(self.hass, tts.DOMAIN, config)
-
-        self.hass.services.call(
-            tts.DOMAIN,
-            "google_translate_say",
-            {
-                "entity_id": "media_player.something",
-                tts.ATTR_MESSAGE: "90% of I person is on front of your door.",
-            },
-        )
-        self.hass.block_till_done()
-
-        assert len(calls) == 0
-        assert len(aioclient_mock.mock_calls) == 1
-
-    @patch("gtts_token.gtts_token.Token.calculate_token", autospec=True, return_value=5)
-    def test_service_say_timeout(self, mock_calculate, aioclient_mock):
-        """Test service call say with http timeout."""
-        calls = mock_service(self.hass, DOMAIN_MP, SERVICE_PLAY_MEDIA)
-
-        aioclient_mock.get(self.url, params=self.url_param, exc=asyncio.TimeoutError())
-
-        config = {tts.DOMAIN: {"platform": "google_translate"}}
-
-        with assert_setup_component(1, tts.DOMAIN):
-            setup_component(self.hass, tts.DOMAIN, config)
-
-        self.hass.services.call(
-            tts.DOMAIN,
-            "google_translate_say",
-            {
-                "entity_id": "media_player.something",
-                tts.ATTR_MESSAGE: "90% of I person is on front of your door.",
-            },
-        )
-        self.hass.block_till_done()
-
-        assert len(calls) == 0
-        assert len(aioclient_mock.mock_calls) == 1
-
-    @patch("gtts_token.gtts_token.Token.calculate_token", autospec=True, return_value=5)
-    def test_service_say_long_size(self, mock_calculate, aioclient_mock):
-        """Test service call say with a lot of text."""
-        calls = mock_service(self.hass, DOMAIN_MP, SERVICE_PLAY_MEDIA)
-
-        self.url_param["total"] = 9
-        self.url_param["q"] = "I%20person%20is%20on%20front%20of%20your%20door"
-        self.url_param["textlen"] = 33
-        for idx in range(9):
-            self.url_param["idx"] = idx
-            aioclient_mock.get(
-                self.url, params=self.url_param, status=200, content=b"test"
-            )
-
-        config = {
-            tts.DOMAIN: {"platform": "google_translate", "service_name": "google_say"}
-        }
-
-        with assert_setup_component(1, tts.DOMAIN):
-            setup_component(self.hass, tts.DOMAIN, config)
-
-        self.hass.services.call(
-            tts.DOMAIN,
-            "google_say",
-            {
-                "entity_id": "media_player.something",
-                tts.ATTR_MESSAGE: (
-                    "I person is on front of your door."
-                    "I person is on front of your door."
-                    "I person is on front of your door."
-                    "I person is on front of your door."
-                    "I person is on front of your door."
-                    "I person is on front of your door."
-                    "I person is on front of your door."
-                    "I person is on front of your door."
-                    "I person is on front of your door."
-                ),
-            },
-        )
-        self.hass.block_till_done()
-
-        assert len(calls) == 1
-        assert len(aioclient_mock.mock_calls) == 9
-        assert calls[0].data[ATTR_MEDIA_CONTENT_ID].find(".mp3") != -1
+@pytest.fixture(autouse=True)
+def cleanup_cache(hass):
+    """Clean up TTS cache."""
+    yield
+    default_tts = hass.config.path(tts.DEFAULT_CACHE_DIR)
+    if os.path.isdir(default_tts):
+        shutil.rmtree(default_tts)
+
+
+@pytest.fixture
+async def calls(hass):
+    """Mock media player calls."""
+    return async_mock_service(hass, DOMAIN_MP, SERVICE_PLAY_MEDIA)
+
+
+@pytest.fixture(autouse=True)
+async def setup_internal_url(hass):
+    """Set up internal url."""
+    await async_process_ha_core_config(
+        hass, {"internal_url": "http://example.local:8123"}
+    )
+
+
+@pytest.fixture
+def mock_gtts():
+    """Mock gtts."""
+    with patch("homeassistant.components.google_translate.tts.gTTS") as mock_gtts:
+        yield mock_gtts
+
+
+async def test_service_say(hass, mock_gtts, calls):
+    """Test service call say."""
+
+    await async_setup_component(
+        hass, tts.DOMAIN, {tts.DOMAIN: {"platform": "google_translate"}}
+    )
+
+    await hass.services.async_call(
+        tts.DOMAIN,
+        "google_translate_say",
+        {
+            "entity_id": "media_player.something",
+            tts.ATTR_MESSAGE: "There is a person at the front door.",
+        },
+        blocking=True,
+    )
+
+    assert len(calls) == 1
+    assert len(mock_gtts.mock_calls) == 2
+    assert calls[0].data[ATTR_MEDIA_CONTENT_ID].find(".mp3") != -1
+
+    assert mock_gtts.mock_calls[0][2] == {
+        "text": "There is a person at the front door.",
+        "lang": "en",
+    }
+
+
+async def test_service_say_german_config(hass, mock_gtts, calls):
+    """Test service call say with german code in the config."""
+
+    await async_setup_component(
+        hass,
+        tts.DOMAIN,
+        {tts.DOMAIN: {"platform": "google_translate", "language": "de"}},
+    )
+
+    await hass.services.async_call(
+        tts.DOMAIN,
+        "google_translate_say",
+        {
+            "entity_id": "media_player.something",
+            tts.ATTR_MESSAGE: "There is a person at the front door.",
+        },
+        blocking=True,
+    )
+
+    assert len(calls) == 1
+    assert len(mock_gtts.mock_calls) == 2
+    assert mock_gtts.mock_calls[0][2] == {
+        "text": "There is a person at the front door.",
+        "lang": "de",
+    }
+
+
+async def test_service_say_german_service(hass, mock_gtts, calls):
+    """Test service call say with german code in the service."""
+
+    config = {
+        tts.DOMAIN: {"platform": "google_translate", "service_name": "google_say"}
+    }
+
+    await async_setup_component(hass, tts.DOMAIN, config)
+
+    await hass.services.async_call(
+        tts.DOMAIN,
+        "google_say",
+        {
+            "entity_id": "media_player.something",
+            tts.ATTR_MESSAGE: "There is a person at the front door.",
+            tts.ATTR_LANGUAGE: "de",
+        },
+        blocking=True,
+    )
+
+    assert len(calls) == 1
+    assert len(mock_gtts.mock_calls) == 2
+    assert mock_gtts.mock_calls[0][2] == {
+        "text": "There is a person at the front door.",
+        "lang": "de",
+    }
+
+
+async def test_service_say_error(hass, mock_gtts, calls):
+    """Test service call say with http response 400."""
+    mock_gtts.return_value.write_to_fp.side_effect = gTTSError
+    await async_setup_component(
+        hass, tts.DOMAIN, {tts.DOMAIN: {"platform": "google_translate"}}
+    )
+
+    await hass.services.async_call(
+        tts.DOMAIN,
+        "google_translate_say",
+        {
+            "entity_id": "media_player.something",
+            tts.ATTR_MESSAGE: "There is a person at the front door.",
+        },
+        blocking=True,
+    )
+
+    assert len(calls) == 0
+    assert len(mock_gtts.mock_calls) == 2