Skip to content

Commit

Permalink
Add text-to-speech beta samples [(#1421)](#1421)
Browse files Browse the repository at this point in the history
  • Loading branch information
nnegrey authored and busunkim96 committed May 20, 2020
0 parents commit af26bb6
Show file tree
Hide file tree
Showing 10 changed files with 420 additions and 0 deletions.
56 changes: 56 additions & 0 deletions texttospeech/snippets/list_voices.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,56 @@
#!/usr/bin/env python

# Copyright 2018 Google Inc. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

"""Google Cloud Text-To-Speech API sample application.
Example usage:
python list_voices.py
"""


# [START tts_list_voices]
def list_voices():
"""Lists the available voices."""
from google.cloud import texttospeech
client = texttospeech.TextToSpeechClient()

# Performs the list voices request
voices = client.list_voices()

for voice in voices.voices:
# Display the voice's name. Example: tpc-vocoded
print('Name: {}'.format(voice.name))

# Display the supported language codes for this voice. Example: "en-US"
for language_code in voice.language_codes:
print('Supported language: {}'.format(language_code))

# SSML Voice Gender values from google.cloud.texttospeech.enums
ssml_voice_genders = ['SSML_VOICE_GENDER_UNSPECIFIED', 'MALE',
'FEMALE', 'NEUTRAL']

# Display the SSML Voice Gender
print('SSML Voice Gender: {}'.format(
ssml_voice_genders[voice.ssml_gender]))

# Display the natural sample rate hertz for this voice. Example: 24000
print('Natural Sample Rate Hertz: {}\n'.format(
voice.natural_sample_rate_hertz))
# [END tts_list_voices]


if __name__ == '__main__':
list_voices()
23 changes: 23 additions & 0 deletions texttospeech/snippets/list_voices_test.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
# Copyright 2018, Google, Inc.
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import list_voices


def test_list_voices(capsys):
list_voices.list_voices()
out, err = capsys.readouterr()

assert 'en-US' in out
assert 'SSML Voice Gender: MALE' in out
assert 'SSML Voice Gender: FEMALE' in out
62 changes: 62 additions & 0 deletions texttospeech/snippets/quickstart.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,62 @@
#!/usr/bin/env python

# Copyright 2018 Google Inc. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

"""Google Cloud Text-To-Speech API sample application .
Example usage:
python quickstart.py
"""


def run_quickstart():
# [START tts_quickstart]
"""Synthesizes speech from the input string of text or ssml.
Note: ssml must be well-formed according to:
https://www.w3.org/TR/speech-synthesis/
"""
from google.cloud import texttospeech

# Instantiates a client
client = texttospeech.TextToSpeechClient()

# Set the text input to be synthesized
synthesis_input = texttospeech.types.SynthesisInput(text="Hello, World!")

# Build the voice request, select the language code ("en-US") and the ssml
# voice gender ("neutral")
voice = texttospeech.types.VoiceSelectionParams(
language_code='en-US',
ssml_gender=texttospeech.enums.SsmlVoiceGender.NEUTRAL)

# Select the type of audio file you want returned
audio_config = texttospeech.types.AudioConfig(
audio_encoding=texttospeech.enums.AudioEncoding.MP3)

# Perform the text-to-speech request on the text input with the selected
# voice parameters and audio file type
response = client.synthesize_speech(synthesis_input, voice, audio_config)

# The response's audio_content is binary.
with open('output.mp3', 'wb') as out:
# Write the response to the output file.
out.write(response.audio_content)
print('Audio content written to file "output.mp3"')
# [END tts_quickstart]


if __name__ == '__main__':
run_quickstart()
1 change: 1 addition & 0 deletions texttospeech/snippets/requirements.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
google-cloud-texttospeech==0.1.0
1 change: 1 addition & 0 deletions texttospeech/snippets/resources/hello.ssml
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
<speak>Hello there.</speak>
1 change: 1 addition & 0 deletions texttospeech/snippets/resources/hello.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Hello there!
102 changes: 102 additions & 0 deletions texttospeech/snippets/synthesize_file.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,102 @@
#!/usr/bin/env python

# Copyright 2018 Google Inc. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

"""Google Cloud Text-To-Speech API sample application .
Example usage:
python synthesize_file.py --text resources/hello.txt
python synthesize_file.py --ssml resources/hello.ssml
"""

import argparse


# [START tts_synthesize_text_file]
def synthesize_text_file(text_file):
"""Synthesizes speech from the input file of text."""
from google.cloud import texttospeech
client = texttospeech.TextToSpeechClient()

with open(text_file, 'r') as f:
text = f.read()
input_text = texttospeech.types.SynthesisInput(text=text)

# Note: the voice can also be specified by name.
# Names of voices can be retrieved with client.list_voices().
voice = texttospeech.types.VoiceSelectionParams(
language_code='en-US',
ssml_gender=texttospeech.enums.SsmlVoiceGender.FEMALE)

audio_config = texttospeech.types.AudioConfig(
audio_encoding=texttospeech.enums.AudioEncoding.MP3)

response = client.synthesize_speech(input_text, voice, audio_config)

# The response's audio_content is binary.
with open('output.mp3', 'wb') as out:
out.write(response.audio_content)
print('Audio content written to file "output.mp3"')
# [END tts_synthesize_text_file]


# [START tts_synthesize_ssml_file]
def synthesize_ssml_file(ssml_file):
"""Synthesizes speech from the input file of ssml.
Note: ssml must be well-formed according to:
https://www.w3.org/TR/speech-synthesis/
"""
from google.cloud import texttospeech
client = texttospeech.TextToSpeechClient()

with open(ssml_file, 'r') as f:
ssml = f.read()
input_text = texttospeech.types.SynthesisInput(ssml=ssml)

# Note: the voice can also be specified by name.
# Names of voices can be retrieved with client.list_voices().
voice = texttospeech.types.VoiceSelectionParams(
language_code='en-US',
ssml_gender=texttospeech.enums.SsmlVoiceGender.FEMALE)

audio_config = texttospeech.types.AudioConfig(
audio_encoding=texttospeech.enums.AudioEncoding.MP3)

response = client.synthesize_speech(input_text, voice, audio_config)

# The response's audio_content is binary.
with open('output.mp3', 'wb') as out:
out.write(response.audio_content)
print('Audio content written to file "output.mp3"')
# [END tts_synthesize_ssml_file]


if __name__ == '__main__':
parser = argparse.ArgumentParser(
description=__doc__,
formatter_class=argparse.RawDescriptionHelpFormatter)
group = parser.add_mutually_exclusive_group(required=True)
group.add_argument('--text',
help='The text file from which to synthesize speech.')
group.add_argument('--ssml',
help='The ssml file from which to synthesize speech.')

args = parser.parse_args()

if args.text:
synthesize_text_file(args.text)
else:
synthesize_ssml_file(args.ssml)
37 changes: 37 additions & 0 deletions texttospeech/snippets/synthesize_file_test.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
# Copyright 2018, Google, Inc.
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import os

import synthesize_file

TEXT_FILE = 'resources/hello.txt'
SSML_FILE = 'resources/hello.ssml'


def test_synthesize_text_file(capsys):
synthesize_file.synthesize_text_file(text_file=TEXT_FILE)
out, err = capsys.readouterr()

assert 'Audio content written to file' in out
statinfo = os.stat('output.mp3')
assert statinfo.st_size > 0


def test_synthesize_ssml_file(capsys):
synthesize_file.synthesize_ssml_file(ssml_file=SSML_FILE)
out, err = capsys.readouterr()

assert 'Audio content written to file' in out
statinfo = os.stat('output.mp3')
assert statinfo.st_size > 0
100 changes: 100 additions & 0 deletions texttospeech/snippets/synthesize_text.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,100 @@
#!/usr/bin/env python

# Copyright 2018 Google Inc. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

"""Google Cloud Text-To-Speech API sample application .
Example usage:
python synthesize_text.py --text "hello"
python synthesize_text.py --ssml "<speak>Hello there.</speak>"
"""

import argparse


# [START tts_synthesize_text]
def synthesize_text(text):
"""Synthesizes speech from the input string of text."""
from google.cloud import texttospeech
client = texttospeech.TextToSpeechClient()

input_text = texttospeech.types.SynthesisInput(text=text)

# Note: the voice can also be specified by name.
# Names of voices can be retrieved with client.list_voices().
voice = texttospeech.types.VoiceSelectionParams(
language_code='en-US',
ssml_gender=texttospeech.enums.SsmlVoiceGender.FEMALE)

audio_config = texttospeech.types.AudioConfig(
audio_encoding=texttospeech.enums.AudioEncoding.MP3)

response = client.synthesize_speech(input_text, voice, audio_config)

# The response's audio_content is binary.
with open('output.mp3', 'wb') as out:
out.write(response.audio_content)
print('Audio content written to file "output.mp3"')
# [END tts_synthesize_text]


# [START tts_synthesize_ssml]
def synthesize_ssml(ssml):
"""Synthesizes speech from the input string of ssml.
Note: ssml must be well-formed according to:
https://www.w3.org/TR/speech-synthesis/
Example: <speak>Hello there.</speak>
"""
from google.cloud import texttospeech
client = texttospeech.TextToSpeechClient()

input_text = texttospeech.types.SynthesisInput(ssml=ssml)

# Note: the voice can also be specified by name.
# Names of voices can be retrieved with client.list_voices().
voice = texttospeech.types.VoiceSelectionParams(
language_code='en-US',
ssml_gender=texttospeech.enums.SsmlVoiceGender.FEMALE)

audio_config = texttospeech.types.AudioConfig(
audio_encoding=texttospeech.enums.AudioEncoding.MP3)

response = client.synthesize_speech(input_text, voice, audio_config)

# The response's audio_content is binary.
with open('output.mp3', 'wb') as out:
out.write(response.audio_content)
print('Audio content written to file "output.mp3"')
# [END tts_synthesize_ssml]


if __name__ == '__main__':
parser = argparse.ArgumentParser(
description=__doc__,
formatter_class=argparse.RawDescriptionHelpFormatter)
group = parser.add_mutually_exclusive_group(required=True)
group.add_argument('--text',
help='The text from which to synthesize speech.')
group.add_argument('--ssml',
help='The ssml string from which to synthesize speech.')

args = parser.parse_args()

if args.text:
synthesize_text(args.text)
else:
synthesize_ssml(args.ssml)
Loading

0 comments on commit af26bb6

Please sign in to comment.