Add text-to-speech beta samples [(#1421)](#1421)

GoogleCloudPlatform · May 20, 2020 · af26bb6 · af26bb6
commit af26bb6
Show file tree

Hide file tree

Showing 10 changed files with 420 additions and 0 deletions.
diff --git a/texttospeech/snippets/list_voices.py b/texttospeech/snippets/list_voices.py
@@ -0,0 +1,56 @@
+#!/usr/bin/env python
+
+# Copyright 2018 Google Inc. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""Google Cloud Text-To-Speech API sample application.
+
+Example usage:
+    python list_voices.py
+"""
+
+
+# [START tts_list_voices]
+def list_voices():
+    """Lists the available voices."""
+    from google.cloud import texttospeech
+    client = texttospeech.TextToSpeechClient()
+
+    # Performs the list voices request
+    voices = client.list_voices()
+
+    for voice in voices.voices:
+        # Display the voice's name. Example: tpc-vocoded
+        print('Name: {}'.format(voice.name))
+
+        # Display the supported language codes for this voice. Example: "en-US"
+        for language_code in voice.language_codes:
+            print('Supported language: {}'.format(language_code))
+
+        # SSML Voice Gender values from google.cloud.texttospeech.enums
+        ssml_voice_genders = ['SSML_VOICE_GENDER_UNSPECIFIED', 'MALE',
+                              'FEMALE', 'NEUTRAL']
+
+        # Display the SSML Voice Gender
+        print('SSML Voice Gender: {}'.format(
+            ssml_voice_genders[voice.ssml_gender]))
+
+        # Display the natural sample rate hertz for this voice. Example: 24000
+        print('Natural Sample Rate Hertz: {}\n'.format(
+            voice.natural_sample_rate_hertz))
+# [END tts_list_voices]
+
+
+if __name__ == '__main__':
+    list_voices()
diff --git a/texttospeech/snippets/list_voices_test.py b/texttospeech/snippets/list_voices_test.py
@@ -0,0 +1,23 @@
+# Copyright 2018, Google, Inc.
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import list_voices
+
+
+def test_list_voices(capsys):
+    list_voices.list_voices()
+    out, err = capsys.readouterr()
+
+    assert 'en-US' in out
+    assert 'SSML Voice Gender: MALE' in out
+    assert 'SSML Voice Gender: FEMALE' in out
diff --git a/texttospeech/snippets/quickstart.py b/texttospeech/snippets/quickstart.py
@@ -0,0 +1,62 @@
+#!/usr/bin/env python
+
+# Copyright 2018 Google Inc. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""Google Cloud Text-To-Speech API sample application .
+
+Example usage:
+    python quickstart.py
+"""
+
+
+def run_quickstart():
+    # [START tts_quickstart]
+    """Synthesizes speech from the input string of text or ssml.
+
+    Note: ssml must be well-formed according to:
+        https://www.w3.org/TR/speech-synthesis/
+    """
+    from google.cloud import texttospeech
+
+    # Instantiates a client
+    client = texttospeech.TextToSpeechClient()
+
+    # Set the text input to be synthesized
+    synthesis_input = texttospeech.types.SynthesisInput(text="Hello, World!")
+
+    # Build the voice request, select the language code ("en-US") and the ssml
+    # voice gender ("neutral")
+    voice = texttospeech.types.VoiceSelectionParams(
+        language_code='en-US',
+        ssml_gender=texttospeech.enums.SsmlVoiceGender.NEUTRAL)
+
+    # Select the type of audio file you want returned
+    audio_config = texttospeech.types.AudioConfig(
+        audio_encoding=texttospeech.enums.AudioEncoding.MP3)
+
+    # Perform the text-to-speech request on the text input with the selected
+    # voice parameters and audio file type
+    response = client.synthesize_speech(synthesis_input, voice, audio_config)
+
+    # The response's audio_content is binary.
+    with open('output.mp3', 'wb') as out:
+        # Write the response to the output file.
+        out.write(response.audio_content)
+        print('Audio content written to file "output.mp3"')
+    # [END tts_quickstart]
+
+
+if __name__ == '__main__':
+    run_quickstart()
diff --git a/texttospeech/snippets/requirements.txt b/texttospeech/snippets/requirements.txt
@@ -0,0 +1 @@
+google-cloud-texttospeech==0.1.0
diff --git a/texttospeech/snippets/resources/hello.ssml b/texttospeech/snippets/resources/hello.ssml
@@ -0,0 +1 @@
+<speak>Hello there.</speak>
diff --git a/texttospeech/snippets/resources/hello.txt b/texttospeech/snippets/resources/hello.txt
@@ -0,0 +1 @@
+Hello there!
diff --git a/texttospeech/snippets/synthesize_file.py b/texttospeech/snippets/synthesize_file.py
@@ -0,0 +1,102 @@
+#!/usr/bin/env python
+
+# Copyright 2018 Google Inc. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""Google Cloud Text-To-Speech API sample application .
+
+Example usage:
+    python synthesize_file.py --text resources/hello.txt
+    python synthesize_file.py --ssml resources/hello.ssml
+"""
+
+import argparse
+
+
+# [START tts_synthesize_text_file]
+def synthesize_text_file(text_file):
+    """Synthesizes speech from the input file of text."""
+    from google.cloud import texttospeech
+    client = texttospeech.TextToSpeechClient()
+
+    with open(text_file, 'r') as f:
+        text = f.read()
+        input_text = texttospeech.types.SynthesisInput(text=text)
+
+    # Note: the voice can also be specified by name.
+    # Names of voices can be retrieved with client.list_voices().
+    voice = texttospeech.types.VoiceSelectionParams(
+        language_code='en-US',
+        ssml_gender=texttospeech.enums.SsmlVoiceGender.FEMALE)
+
+    audio_config = texttospeech.types.AudioConfig(
+        audio_encoding=texttospeech.enums.AudioEncoding.MP3)
+
+    response = client.synthesize_speech(input_text, voice, audio_config)
+
+    # The response's audio_content is binary.
+    with open('output.mp3', 'wb') as out:
+        out.write(response.audio_content)
+        print('Audio content written to file "output.mp3"')
+# [END tts_synthesize_text_file]
+
+
+# [START tts_synthesize_ssml_file]
+def synthesize_ssml_file(ssml_file):
+    """Synthesizes speech from the input file of ssml.
+
+    Note: ssml must be well-formed according to:
+        https://www.w3.org/TR/speech-synthesis/
+    """
+    from google.cloud import texttospeech
+    client = texttospeech.TextToSpeechClient()
+
+    with open(ssml_file, 'r') as f:
+        ssml = f.read()
+        input_text = texttospeech.types.SynthesisInput(ssml=ssml)
+
+    # Note: the voice can also be specified by name.
+    # Names of voices can be retrieved with client.list_voices().
+    voice = texttospeech.types.VoiceSelectionParams(
+        language_code='en-US',
+        ssml_gender=texttospeech.enums.SsmlVoiceGender.FEMALE)
+
+    audio_config = texttospeech.types.AudioConfig(
+        audio_encoding=texttospeech.enums.AudioEncoding.MP3)
+
+    response = client.synthesize_speech(input_text, voice, audio_config)
+
+    # The response's audio_content is binary.
+    with open('output.mp3', 'wb') as out:
+        out.write(response.audio_content)
+        print('Audio content written to file "output.mp3"')
+# [END tts_synthesize_ssml_file]
+
+
+if __name__ == '__main__':
+    parser = argparse.ArgumentParser(
+        description=__doc__,
+        formatter_class=argparse.RawDescriptionHelpFormatter)
+    group = parser.add_mutually_exclusive_group(required=True)
+    group.add_argument('--text',
+                       help='The text file from which to synthesize speech.')
+    group.add_argument('--ssml',
+                       help='The ssml file from which to synthesize speech.')
+
+    args = parser.parse_args()
+
+    if args.text:
+        synthesize_text_file(args.text)
+    else:
+        synthesize_ssml_file(args.ssml)
diff --git a/texttospeech/snippets/synthesize_file_test.py b/texttospeech/snippets/synthesize_file_test.py
@@ -0,0 +1,37 @@
+# Copyright 2018, Google, Inc.
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import os
+
+import synthesize_file
+
+TEXT_FILE = 'resources/hello.txt'
+SSML_FILE = 'resources/hello.ssml'
+
+
+def test_synthesize_text_file(capsys):
+    synthesize_file.synthesize_text_file(text_file=TEXT_FILE)
+    out, err = capsys.readouterr()
+
+    assert 'Audio content written to file' in out
+    statinfo = os.stat('output.mp3')
+    assert statinfo.st_size > 0
+
+
+def test_synthesize_ssml_file(capsys):
+    synthesize_file.synthesize_ssml_file(ssml_file=SSML_FILE)
+    out, err = capsys.readouterr()
+
+    assert 'Audio content written to file' in out
+    statinfo = os.stat('output.mp3')
+    assert statinfo.st_size > 0
diff --git a/texttospeech/snippets/synthesize_text.py b/texttospeech/snippets/synthesize_text.py
@@ -0,0 +1,100 @@
+#!/usr/bin/env python
+
+# Copyright 2018 Google Inc. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""Google Cloud Text-To-Speech API sample application .
+
+Example usage:
+    python synthesize_text.py --text "hello"
+    python synthesize_text.py --ssml "<speak>Hello there.</speak>"
+"""
+
+import argparse
+
+
+# [START tts_synthesize_text]
+def synthesize_text(text):
+    """Synthesizes speech from the input string of text."""
+    from google.cloud import texttospeech
+    client = texttospeech.TextToSpeechClient()
+
+    input_text = texttospeech.types.SynthesisInput(text=text)
+
+    # Note: the voice can also be specified by name.
+    # Names of voices can be retrieved with client.list_voices().
+    voice = texttospeech.types.VoiceSelectionParams(
+        language_code='en-US',
+        ssml_gender=texttospeech.enums.SsmlVoiceGender.FEMALE)
+
+    audio_config = texttospeech.types.AudioConfig(
+        audio_encoding=texttospeech.enums.AudioEncoding.MP3)
+
+    response = client.synthesize_speech(input_text, voice, audio_config)
+
+    # The response's audio_content is binary.
+    with open('output.mp3', 'wb') as out:
+        out.write(response.audio_content)
+        print('Audio content written to file "output.mp3"')
+# [END tts_synthesize_text]
+
+
+# [START tts_synthesize_ssml]
+def synthesize_ssml(ssml):
+    """Synthesizes speech from the input string of ssml.
+
+    Note: ssml must be well-formed according to:
+        https://www.w3.org/TR/speech-synthesis/
+
+    Example: <speak>Hello there.</speak>
+    """
+    from google.cloud import texttospeech
+    client = texttospeech.TextToSpeechClient()
+
+    input_text = texttospeech.types.SynthesisInput(ssml=ssml)
+
+    # Note: the voice can also be specified by name.
+    # Names of voices can be retrieved with client.list_voices().
+    voice = texttospeech.types.VoiceSelectionParams(
+        language_code='en-US',
+        ssml_gender=texttospeech.enums.SsmlVoiceGender.FEMALE)
+
+    audio_config = texttospeech.types.AudioConfig(
+        audio_encoding=texttospeech.enums.AudioEncoding.MP3)
+
+    response = client.synthesize_speech(input_text, voice, audio_config)
+
+    # The response's audio_content is binary.
+    with open('output.mp3', 'wb') as out:
+        out.write(response.audio_content)
+        print('Audio content written to file "output.mp3"')
+# [END tts_synthesize_ssml]
+
+
+if __name__ == '__main__':
+    parser = argparse.ArgumentParser(
+        description=__doc__,
+        formatter_class=argparse.RawDescriptionHelpFormatter)
+    group = parser.add_mutually_exclusive_group(required=True)
+    group.add_argument('--text',
+                       help='The text from which to synthesize speech.')
+    group.add_argument('--ssml',
+                       help='The ssml string from which to synthesize speech.')
+
+    args = parser.parse_args()
+
+    if args.text:
+        synthesize_text(args.text)
+    else:
+        synthesize_ssml(args.ssml)