Skip to content

Commit 3257184

Browse files
[HOLD] Reintroduce TTS WS
1 parent 5d7b655 commit 3257184

File tree

30 files changed

+2980
-216
lines changed

30 files changed

+2980
-216
lines changed

deepgram/__init__.py

Lines changed: 38 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -96,12 +96,12 @@
9696
from .client import (
9797
SpeakOptions,
9898
SpeakRESTOptions,
99-
# SpeakWebSocketOptions,
99+
SpeakWSOptions,
100100
# FileSource,
101101
SpeakRestSource,
102102
SpeakSource,
103103
)
104-
from .client import SpeakWebSocketEvents
104+
from .client import SpeakWebSocketEvents, SpeakWebSocketMessage
105105

106106
## speak REST
107107
from .client import (
@@ -115,21 +115,22 @@
115115
SpeakRESTResponse,
116116
)
117117

118-
# ## speak WebSocket
119-
# from .client import (
120-
# SpeakWebSocketClient,
121-
# AsyncSpeakWebSocketClient,
122-
# )
123-
# from .client import (
124-
# SpeakWebSocketResponse,
125-
# # OpenResponse,
126-
# # MetadataResponse,
127-
# FlushedResponse,
128-
# # CloseResponse,
129-
# # UnhandledResponse,
130-
# WarningResponse,
131-
# # ErrorResponse,
132-
# )
118+
## speak WebSocket
119+
from .client import (
120+
SpeakWebSocketClient,
121+
AsyncSpeakWebSocketClient,
122+
SpeakWSClient,
123+
AsyncSpeakWSClient,
124+
)
125+
from .client import (
126+
# OpenResponse,
127+
# MetadataResponse,
128+
FlushedResponse,
129+
# CloseResponse,
130+
# UnhandledResponse,
131+
WarningResponse,
132+
# ErrorResponse,
133+
)
133134

134135
# manage
135136
from .client import ManageClient, AsyncManageClient
@@ -173,10 +174,26 @@
173174
)
174175

175176
# utilities
177+
# pylint: disable=wrong-import-position
176178
from .audio import Microphone
177179
from .audio import (
178-
LOGGING,
179-
CHANNELS,
180-
RATE,
181-
CHUNK,
180+
INPUT_LOGGING,
181+
INPUT_CHANNELS,
182+
INPUT_RATE,
183+
INPUT_CHUNK,
182184
)
185+
186+
LOGGING = INPUT_LOGGING
187+
CHANNELS = INPUT_CHANNELS
188+
RATE = INPUT_RATE
189+
CHUNK = INPUT_CHUNK
190+
191+
from .audio import Speaker
192+
from .audio import (
193+
OUTPUT_LOGGING,
194+
OUTPUT_CHANNELS,
195+
OUTPUT_RATE,
196+
OUTPUT_CHUNK,
197+
)
198+
199+
# pylint: enable=wrong-import-position

deepgram/audio/__init__.py

Lines changed: 14 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,4 +3,17 @@
33
# SPDX-License-Identifier: MIT
44

55
from .microphone import Microphone
6-
from .microphone import LOGGING, CHANNELS, RATE, CHUNK
6+
from .microphone import (
7+
LOGGING as INPUT_LOGGING,
8+
CHANNELS as INPUT_CHANNELS,
9+
RATE as INPUT_RATE,
10+
CHUNK as INPUT_CHUNK,
11+
)
12+
13+
from .speaker import Speaker
14+
from .speaker import (
15+
LOGGING as OUTPUT_LOGGING,
16+
CHANNELS as OUTPUT_CHANNELS,
17+
RATE as OUTPUT_RATE,
18+
CHUNK as OUTPUT_CHUNK,
19+
)

deepgram/audio/microphone/constants.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,6 @@
55
from ...utils import verboselogs
66

77
# Constants for microphone
8-
98
LOGGING = verboselogs.WARNING
109
CHANNELS = 1
1110
RATE = 16000

deepgram/audio/microphone/microphone.py

Lines changed: 60 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@
55
import inspect
66
import asyncio
77
import threading
8-
from typing import Optional, Callable, TYPE_CHECKING
8+
from typing import Optional, Callable, Union, TYPE_CHECKING
99
import logging
1010

1111
from ...utils import verboselogs
@@ -21,10 +21,10 @@ class Microphone: # pylint: disable=too-many-instance-attributes
2121
"""
2222

2323
_logger: verboselogs.VerboseLogger
24-
_exit: threading.Event
2524

2625
_audio: "pyaudio.PyAudio"
2726
_stream: "pyaudio.Stream"
27+
2828
_chunk: int
2929
_rate: int
3030
_format: int
@@ -34,9 +34,10 @@ class Microphone: # pylint: disable=too-many-instance-attributes
3434

3535
_asyncio_loop: asyncio.AbstractEventLoop
3636
_asyncio_thread: threading.Thread
37+
_exit: threading.Event
3738

38-
_push_callback_org: object
39-
_push_callback: object
39+
_push_callback_org: Optional[Callable] = None
40+
_push_callback: Optional[Callable] = None
4041

4142
def __init__(
4243
self,
@@ -53,6 +54,7 @@ def __init__(
5354
self._logger = verboselogs.VerboseLogger(__name__)
5455
self._logger.addHandler(logging.StreamHandler())
5556
self._logger.setLevel(verbose)
57+
5658
self._exit = threading.Event()
5759

5860
self._audio = pyaudio.PyAudio()
@@ -71,9 +73,16 @@ def _start_asyncio_loop(self) -> None:
7173

7274
def is_active(self) -> bool:
7375
"""
74-
returns True if the stream is active, False otherwise
76+
is_active - returns the state of the stream
77+
78+
Args:
79+
None
80+
81+
Returns:
82+
True if the stream is active, False otherwise
7583
"""
7684
self._logger.debug("Microphone.is_active ENTER")
85+
7786
if self._stream is None:
7887
self._logger.error("stream is None")
7988
self._logger.debug("Microphone.is_active LEAVE")
@@ -87,24 +96,34 @@ def is_active(self) -> bool:
8796

8897
def set_callback(self, push_callback: Callable) -> None:
8998
"""
90-
Set the callback function to be called when data is received.
99+
set_callback - sets the callback function to be called when data is received.
100+
101+
Args:
102+
push_callback (Callable): The callback function to be called when data is received.
103+
This should be the websocket send function.
104+
105+
Returns:
106+
None
91107
"""
92108
self._push_callback_org = push_callback
93109

94110
def start(self) -> bool:
95111
"""
96-
starts the microphone stream
112+
starts - starts the microphone stream
113+
114+
Returns:
115+
bool: True if the stream was started, False otherwise
97116
"""
98117
self._logger.debug("Microphone.start ENTER")
99118

100119
self._logger.info("format: %s", self._format)
101120
self._logger.info("channels: %d", self._channels)
102121
self._logger.info("rate: %d", self._rate)
103122
self._logger.info("chunk: %d", self._chunk)
104-
self._logger.info("input_device_id: %d", self._input_device_index)
123+
# self._logger.info("input_device_id: %d", self._input_device_index)
105124

106125
if self._push_callback_org is None:
107-
self._logger.error("start() failed. No callback set.")
126+
self._logger.error("start failed. No callback set.")
108127
self._logger.debug("Microphone.start LEAVE")
109128
return False
110129

@@ -114,9 +133,13 @@ def start(self) -> bool:
114133
self._asyncio_thread = threading.Thread(target=self._start_asyncio_loop)
115134
self._asyncio_thread.start()
116135

117-
self._push_callback = lambda data: asyncio.run_coroutine_threadsafe(
118-
self._push_callback_org(data), self._asyncio_loop
119-
).result()
136+
self._push_callback = lambda data: (
137+
asyncio.run_coroutine_threadsafe(
138+
self._push_callback_org(data), self._asyncio_loop
139+
).result()
140+
if self._push_callback_org
141+
else None
142+
)
120143
else:
121144
self._logger.verbose("regular threaded callback")
122145
self._push_callback = self._push_callback_org
@@ -134,7 +157,7 @@ def start(self) -> bool:
134157
self._exit.clear()
135158
self._stream.start_stream()
136159

137-
self._logger.notice("start() succeeded")
160+
self._logger.notice("start succeeded")
138161
self._logger.debug("Microphone.start LEAVE")
139162
return True
140163

@@ -176,41 +199,50 @@ def _callback(
176199

177200
def mute(self) -> bool:
178201
"""
179-
Mutes the microphone stream
202+
mute - mutes the microphone stream
203+
204+
Returns:
205+
bool: True if the stream was muted, False otherwise
180206
"""
181207
self._logger.debug("Microphone.mute ENTER")
182208

183209
if self._stream is None:
184-
self._logger.error("mute() failed. Library not initialized.")
210+
self._logger.error("mute failed. Library not initialized.")
185211
self._logger.debug("Microphone.mute LEAVE")
186212
return False
187213

188214
self._is_muted = True
189215

190-
self._logger.notice("mute() succeeded")
216+
self._logger.notice("mute succeeded")
191217
self._logger.debug("Microphone.mute LEAVE")
192218
return True
193219

194220
def unmute(self) -> bool:
195221
"""
196-
Unmutes the microphone stream
222+
unmute - unmutes the microphone stream
223+
224+
Returns:
225+
bool: True if the stream was unmuted, False otherwise
197226
"""
198227
self._logger.debug("Microphone.unmute ENTER")
199228

200229
if self._stream is None:
201-
self._logger.error("unmute() failed. Library not initialized.")
230+
self._logger.error("unmute failed. Library not initialized.")
202231
self._logger.debug("Microphone.unmute LEAVE")
203232
return False
204233

205234
self._is_muted = False
206235

207-
self._logger.notice("unmute() succeeded")
236+
self._logger.notice("unmute succeeded")
208237
self._logger.debug("Microphone.unmute LEAVE")
209238
return True
210239

211240
def finish(self) -> bool:
212241
"""
213-
Stops the microphone stream
242+
finish - stops the microphone stream
243+
244+
Returns:
245+
bool: True if the stream was stopped, False otherwise
214246
"""
215247
self._logger.debug("Microphone.finish ENTER")
216248

@@ -219,19 +251,24 @@ def finish(self) -> bool:
219251

220252
# Stop the stream.
221253
if self._stream is not None:
254+
self._logger.notice("stopping stream...")
222255
self._stream.stop_stream()
223256
self._stream.close()
224257
self._stream = None # type: ignore
258+
self._logger.notice("stream stopped")
225259

226260
# clean up the thread
227261
if (
228-
inspect.iscoroutinefunction(self._push_callback_org)
229-
and self._asyncio_thread is not None
262+
# inspect.iscoroutinefunction(self._push_callback_org)
263+
# and
264+
self._asyncio_thread
265+
is not None
230266
):
267+
self._logger.notice("stopping asyncio loop...")
231268
self._asyncio_loop.call_soon_threadsafe(self._asyncio_loop.stop)
232269
self._asyncio_thread.join()
233270
self._asyncio_thread = None # type: ignore
234-
self._logger.notice("stream/recv thread joined")
271+
self._logger.notice("_asyncio_thread joined")
235272

236273
self._logger.notice("finish succeeded")
237274
self._logger.debug("Microphone.finish LEAVE")

deepgram/audio/speaker/__init__.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
# Copyright 2023-2024 Deepgram SDK contributors. All Rights Reserved.
2+
# Use of this source code is governed by a MIT license that can be found in the LICENSE file.
3+
# SPDX-License-Identifier: MIT
4+
5+
from .speaker import Speaker
6+
from .constants import LOGGING, CHANNELS, RATE, CHUNK
Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,12 @@
1+
# Copyright 2024 Deepgram SDK contributors. All Rights Reserved.
2+
# Use of this source code is governed by a MIT license that can be found in the LICENSE file.
3+
# SPDX-License-Identifier: MIT
4+
5+
from ...utils import verboselogs
6+
7+
# Constants for microphone
8+
LOGGING = verboselogs.WARNING
9+
TIMEOUT = 0.050
10+
CHANNELS = 1
11+
RATE = 48000
12+
CHUNK = 8194

deepgram/audio/speaker/errors.py

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,21 @@
1+
# Copyright 2024 Deepgram SDK contributors. All Rights Reserved.
2+
# Use of this source code is governed by a MIT license that can be found in the LICENSE file.
3+
# SPDX-License-Identifier: MIT
4+
5+
6+
# exceptions for speaker
7+
class DeepgramSpeakerError(Exception):
8+
"""
9+
Exception raised for known errors related to Speaker library.
10+
11+
Attributes:
12+
message (str): The error message describing the exception.
13+
"""
14+
15+
def __init__(self, message: str):
16+
super().__init__(message)
17+
self.name = "DeepgramSpeakerError"
18+
self.message = message
19+
20+
def __str__(self):
21+
return f"{self.name}: {self.message}"

0 commit comments

Comments
 (0)