diff --git a/aider/args.py b/aider/args.py index 754ef23ed0a..0627b1a66e1 100644 --- a/aider/args.py +++ b/aider/args.py @@ -770,6 +770,12 @@ def get_parser(default_config_files, git_root): default="en", help="Specify the language for voice using ISO 639-1 code (default: auto)", ) + group.add_argument( + "--voice-input-device", + metavar="VOICE_INPUT_DEVICE", + default=None, + help="Specify the input device name for voice recording", + ) return parser diff --git a/aider/commands.py b/aider/commands.py index fea6a208d91..fdb6ea4b84a 100644 --- a/aider/commands.py +++ b/aider/commands.py @@ -1085,7 +1085,7 @@ def cmd_voice(self, args): self.io.tool_error("To use /voice you must provide an OpenAI API key.") return try: - self.voice = voice.Voice(audio_format=self.args.voice_format) + self.voice = voice.Voice(audio_format=self.args.voice_format, device_name=self.args.voice_input_device) except voice.SoundDeviceError: self.io.tool_error( "Unable to import `sounddevice` and/or `soundfile`, is portaudio installed?" diff --git a/aider/voice.py b/aider/voice.py index 47fb49c6e79..1e9f700b600 100644 --- a/aider/voice.py +++ b/aider/voice.py @@ -34,7 +34,7 @@ class Voice: threshold = 0.15 - def __init__(self, audio_format="wav"): + def __init__(self, audio_format="wav", device_name=None): if sf is None: raise SoundDeviceError try: @@ -42,6 +42,27 @@ def __init__(self, audio_format="wav"): import sounddevice as sd self.sd = sd + + + devices = sd.query_devices() + + if device_name: + # Find the device with matching name + device_id = None + for i, device in enumerate(devices): + if device_name in device["name"]: + device_id = i + break + if device_id is None: + available_inputs = [d["name"] for d in devices if d["max_input_channels"] > 0] + raise ValueError(f"Device '{device_name}' not found. Available input devices: {available_inputs}") + + print(f"Using input device: {device_name} (ID: {device_id})") + + self.device_id = device_id + else: + self.device_id = None + except (OSError, ModuleNotFoundError): raise SoundDeviceError if audio_format not in ["wav", "mp3", "webm"]: @@ -93,7 +114,7 @@ def raw_record_and_transcribe(self, history, language): temp_wav = tempfile.mktemp(suffix=".wav") try: - sample_rate = int(self.sd.query_devices(None, "input")["default_samplerate"]) + sample_rate = int(self.sd.query_devices(self.device_id, "input")["default_samplerate"]) except (TypeError, ValueError): sample_rate = 16000 # fallback to 16kHz if unable to query device except self.sd.PortAudioError: @@ -104,7 +125,7 @@ def raw_record_and_transcribe(self, history, language): self.start_time = time.time() try: - with self.sd.InputStream(samplerate=sample_rate, channels=1, callback=self.callback): + with self.sd.InputStream(samplerate=sample_rate, channels=1, callback=self.callback, device=self.device_id): prompt(self.get_prompt, refresh_interval=0.1) except self.sd.PortAudioError as err: raise SoundDeviceError(f"Error accessing audio input device: {err}") diff --git a/aider/website/assets/sample.aider.conf.yml b/aider/website/assets/sample.aider.conf.yml index 9baeb5c32cc..811e8dc02c4 100644 --- a/aider/website/assets/sample.aider.conf.yml +++ b/aider/website/assets/sample.aider.conf.yml @@ -390,3 +390,9 @@ ## Specify the language for voice using ISO 639-1 code (default: auto) #voice-language: en + +## Specify the language for voice using ISO 639-1 code (default: auto) +#voice-language: en + +##Specify the voice input device name used for recording (default: system default) +#voice-input-device: xxx diff --git a/aider/website/assets/sample.env b/aider/website/assets/sample.env index 6076934b04b..d0452656494 100644 --- a/aider/website/assets/sample.env +++ b/aider/website/assets/sample.env @@ -368,3 +368,6 @@ ## Specify the language for voice using ISO 639-1 code (default: auto) #AIDER_VOICE_LANGUAGE=en + +## Specify the voice input device name (default: system default) +#AIDER_VOICE_INPUT_DEVICE="MacBook Pro Microphone" diff --git a/aider/website/docs/config/aider_conf.md b/aider/website/docs/config/aider_conf.md index f9a50404fd5..ddc1aa986e2 100644 --- a/aider/website/docs/config/aider_conf.md +++ b/aider/website/docs/config/aider_conf.md @@ -446,5 +446,8 @@ cog.outl("```") ## Specify the language for voice using ISO 639-1 code (default: auto) #voice-language: en + +##Specify the voice input device name used for recording (default: system default) +voice-input-device: xxx ``` diff --git a/aider/website/docs/config/dotenv.md b/aider/website/docs/config/dotenv.md index fbd020840e0..1d1d2288d53 100644 --- a/aider/website/docs/config/dotenv.md +++ b/aider/website/docs/config/dotenv.md @@ -410,7 +410,8 @@ cog.outl("```") ## Specify the language for voice using ISO 639-1 code (default: auto) #AIDER_VOICE_LANGUAGE=en + +## Specify the voice input device name (default: system default) +#AIDER_VOICE_INPUT_DEVICE="MacBook Pro Microphone" ``` - - diff --git a/aider/website/docs/config/options.md b/aider/website/docs/config/options.md index db7058c85cc..05377a3de88 100644 --- a/aider/website/docs/config/options.md +++ b/aider/website/docs/config/options.md @@ -75,7 +75,7 @@ usage: aider [-h] [--openai-api-key] [--anthropic-api-key] [--model] [--suggest-shell-commands | --no-suggest-shell-commands] [--fancy-input | --no-fancy-input] [--detect-urls | --no-detect-urls] [--editor] - [--voice-format] [--voice-language] + [--voice-format] [--voice-language] [--voice-input-device] ``` @@ -701,4 +701,9 @@ Environment variable: `AIDER_VOICE_FORMAT` Specify the language for voice using ISO 639-1 code (default: auto) Default: en Environment variable: `AIDER_VOICE_LANGUAGE` + +### `--voice-input-device VOICE_INPUT_DEVICE` +Specify the voice input device name used for recording (default: system default) +Default: system default +Environment variable: `VOICE_INPUT_DEVICE`