diff --git a/software/poetry.lock b/software/poetry.lock index bf8f4a9..23b0bb1 100644 --- a/software/poetry.lock +++ b/software/poetry.lock @@ -1,4 +1,4 @@ -# This file is automatically @generated by Poetry 1.7.1 and should not be changed by hand. +# This file is automatically @generated by Poetry 1.8.2 and should not be changed by hand. [[package]] name = "aifs" @@ -7907,6 +7907,26 @@ files = [ {file = "sniffio-1.3.1.tar.gz", hash = "sha256:f4324edc670a0f49750a81b895f35c3adb843cca46f0530f79fc1babb23789dc"}, ] +[[package]] +name = "sounddevice" +version = "0.4.6" +description = "Play and Record Sound with Python" +optional = false +python-versions = ">=3.7" +files = [ + {file = "sounddevice-0.4.6-py3-none-any.whl", hash = "sha256:5de768ba6fe56ad2b5aaa2eea794b76b73e427961c95acad2ee2ed7f866a4b20"}, + {file = "sounddevice-0.4.6-py3-none-macosx_10_6_x86_64.macosx_10_6_universal2.whl", hash = "sha256:8b0b806c205dd3e3cd5a97262b2482624fd21db7d47083b887090148a08051c8"}, + {file = "sounddevice-0.4.6-py3-none-win32.whl", hash = "sha256:e3ba6e674ffa8f79a591d744a1d4ab922fe5bdfd4faf8b25069a08e051010b7b"}, + {file = "sounddevice-0.4.6-py3-none-win_amd64.whl", hash = "sha256:7830d4f8f8570f2e5552942f81d96999c5fcd9a0b682d6fc5d5c5529df23be2c"}, + {file = "sounddevice-0.4.6.tar.gz", hash = "sha256:3236b78f15f0415bdf006a620cef073d0c0522851d66f4a961ed6d8eb1482fe9"}, +] + +[package.dependencies] +CFFI = ">=1.0" + +[package.extras] +numpy = ["NumPy"] + [[package]] name = "soupsieve" version = "2.5" @@ -9252,4 +9272,4 @@ testing = ["big-O", "jaraco.functools", "jaraco.itertools", "more-itertools", "p [metadata] lock-version = "2.0" python-versions = ">=3.9,<3.12" -content-hash = "8f22d228c8b3c85a5b92fd98eb51042ab214f32634ff024fc036e78b3535107d" +content-hash = "0e53c86bb6d02864d2af0326f43c30b1d98037c7dcc025ce5b72d894ca15bc63" diff --git a/software/pyproject.toml b/software/pyproject.toml index 6d331ea..d146743 100644 --- a/software/pyproject.toml +++ b/software/pyproject.toml @@ -34,6 +34,7 @@ pytimeparse = "^1.1.8" python-crontab = "^3.0.0" inquirer = "^3.2.4" pyqrcode = "^1.2.1" +sounddevice = "^0.4.6" [build-system] requires = ["poetry-core"] diff --git a/software/start.py b/software/start.py index 9b8b627..d19b622 100644 --- a/software/start.py +++ b/software/start.py @@ -8,6 +8,7 @@ from source.server.tunnel import create_tunnel from source.server.server import main from source.server.utils.local_mode import select_local_model +import sounddevice import signal app = typer.Typer() @@ -17,32 +18,32 @@ def run( server: bool = typer.Option(False, "--server", help="Run server"), server_host: str = typer.Option("0.0.0.0", "--server-host", help="Specify the server host where the server will deploy"), server_port: int = typer.Option(10001, "--server-port", help="Specify the server port where the server will deploy"), - + tunnel_service: str = typer.Option("ngrok", "--tunnel-service", help="Specify the tunnel service"), expose: bool = typer.Option(False, "--expose", help="Expose server to internet"), - + client: bool = typer.Option(False, "--client", help="Run client"), server_url: str = typer.Option(None, "--server-url", help="Specify the server URL that the client should expect. Defaults to server-host and server-port"), client_type: str = typer.Option("auto", "--client-type", help="Specify the client type"), - + llm_service: str = typer.Option("litellm", "--llm-service", help="Specify the LLM service"), - + model: str = typer.Option("gpt-4", "--model", help="Specify the model"), llm_supports_vision: bool = typer.Option(False, "--llm-supports-vision", help="Specify if the LLM service supports vision"), llm_supports_functions: bool = typer.Option(False, "--llm-supports-functions", help="Specify if the LLM service supports functions"), context_window: int = typer.Option(2048, "--context-window", help="Specify the context window size"), max_tokens: int = typer.Option(4096, "--max-tokens", help="Specify the maximum number of tokens"), temperature: float = typer.Option(0.8, "--temperature", help="Specify the temperature for generation"), - + tts_service: str = typer.Option("openai", "--tts-service", help="Specify the TTS service"), - + stt_service: str = typer.Option("openai", "--stt-service", help="Specify the STT service"), local: bool = typer.Option(False, "--local", help="Use recommended local services for LLM, STT, and TTS"), - + qr: bool = typer.Option(False, "--qr", help="Print the QR code for the server URL") ): - + _run( server=server, server_host=server_host, @@ -69,41 +70,41 @@ def _run( server: bool = False, server_host: str = "0.0.0.0", server_port: int = 10001, - + tunnel_service: str = "bore", expose: bool = False, - + client: bool = False, server_url: str = None, client_type: str = "auto", - + llm_service: str = "litellm", - + model: str = "gpt-4", llm_supports_vision: bool = False, llm_supports_functions: bool = False, context_window: int = 2048, max_tokens: int = 4096, temperature: float = 0.8, - + tts_service: str = "openai", - + stt_service: str = "openai", local: bool = False, - + qr: bool = False ): - + if local: tts_service = "piper" # llm_service = "llamafile" stt_service = "local-whisper" select_local_model() - + if not server_url: server_url = f"{server_host}:{server_port}" - + if not server and not client: server = True client = True @@ -152,4 +153,4 @@ def handle_exit(signum, frame): if client: client_thread.join() except KeyboardInterrupt: - os.kill(os.getpid(), signal.SIGINT) \ No newline at end of file + os.kill(os.getpid(), signal.SIGINT)