Skip to content

Commit

Permalink
Merge pull request #2497 from preynal/main
Browse files Browse the repository at this point in the history
feat: ability to select audio input device
  • Loading branch information
paul-gauthier authored Nov 30, 2024
2 parents 37b31c4 + 23825ca commit 82929f6
Show file tree
Hide file tree
Showing 8 changed files with 52 additions and 7 deletions.
6 changes: 6 additions & 0 deletions aider/args.py
Original file line number Diff line number Diff line change
Expand Up @@ -770,6 +770,12 @@ def get_parser(default_config_files, git_root):
default="en",
help="Specify the language for voice using ISO 639-1 code (default: auto)",
)
group.add_argument(
"--voice-input-device",
metavar="VOICE_INPUT_DEVICE",
default=None,
help="Specify the input device name for voice recording",
)

return parser

Expand Down
2 changes: 1 addition & 1 deletion aider/commands.py
Original file line number Diff line number Diff line change
Expand Up @@ -1085,7 +1085,7 @@ def cmd_voice(self, args):
self.io.tool_error("To use /voice you must provide an OpenAI API key.")
return
try:
self.voice = voice.Voice(audio_format=self.args.voice_format)
self.voice = voice.Voice(audio_format=self.args.voice_format, device_name=self.args.voice_input_device)
except voice.SoundDeviceError:
self.io.tool_error(
"Unable to import `sounddevice` and/or `soundfile`, is portaudio installed?"
Expand Down
27 changes: 24 additions & 3 deletions aider/voice.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,14 +34,35 @@ class Voice:

threshold = 0.15

def __init__(self, audio_format="wav"):
def __init__(self, audio_format="wav", device_name=None):
if sf is None:
raise SoundDeviceError
try:
print("Initializing sound device...")
import sounddevice as sd

self.sd = sd


devices = sd.query_devices()

if device_name:
# Find the device with matching name
device_id = None
for i, device in enumerate(devices):
if device_name in device["name"]:
device_id = i
break
if device_id is None:
available_inputs = [d["name"] for d in devices if d["max_input_channels"] > 0]
raise ValueError(f"Device '{device_name}' not found. Available input devices: {available_inputs}")

print(f"Using input device: {device_name} (ID: {device_id})")

self.device_id = device_id
else:
self.device_id = None

except (OSError, ModuleNotFoundError):
raise SoundDeviceError
if audio_format not in ["wav", "mp3", "webm"]:
Expand Down Expand Up @@ -93,7 +114,7 @@ def raw_record_and_transcribe(self, history, language):
temp_wav = tempfile.mktemp(suffix=".wav")

try:
sample_rate = int(self.sd.query_devices(None, "input")["default_samplerate"])
sample_rate = int(self.sd.query_devices(self.device_id, "input")["default_samplerate"])
except (TypeError, ValueError):
sample_rate = 16000 # fallback to 16kHz if unable to query device
except self.sd.PortAudioError:
Expand All @@ -104,7 +125,7 @@ def raw_record_and_transcribe(self, history, language):
self.start_time = time.time()

try:
with self.sd.InputStream(samplerate=sample_rate, channels=1, callback=self.callback):
with self.sd.InputStream(samplerate=sample_rate, channels=1, callback=self.callback, device=self.device_id):
prompt(self.get_prompt, refresh_interval=0.1)
except self.sd.PortAudioError as err:
raise SoundDeviceError(f"Error accessing audio input device: {err}")
Expand Down
6 changes: 6 additions & 0 deletions aider/website/assets/sample.aider.conf.yml
Original file line number Diff line number Diff line change
Expand Up @@ -390,3 +390,9 @@

## Specify the language for voice using ISO 639-1 code (default: auto)
#voice-language: en

## Specify the language for voice using ISO 639-1 code (default: auto)
#voice-language: en

##Specify the voice input device name used for recording (default: system default)
#voice-input-device: xxx
3 changes: 3 additions & 0 deletions aider/website/assets/sample.env
Original file line number Diff line number Diff line change
Expand Up @@ -368,3 +368,6 @@

## Specify the language for voice using ISO 639-1 code (default: auto)
#AIDER_VOICE_LANGUAGE=en

## Specify the voice input device name (default: system default)
#AIDER_VOICE_INPUT_DEVICE="MacBook Pro Microphone"
3 changes: 3 additions & 0 deletions aider/website/docs/config/aider_conf.md
Original file line number Diff line number Diff line change
Expand Up @@ -446,5 +446,8 @@ cog.outl("```")
## Specify the language for voice using ISO 639-1 code (default: auto)
#voice-language: en
##Specify the voice input device name used for recording (default: system default)
voice-input-device: xxx
```
<!--[[[end]]]-->
5 changes: 3 additions & 2 deletions aider/website/docs/config/dotenv.md
Original file line number Diff line number Diff line change
Expand Up @@ -410,7 +410,8 @@ cog.outl("```")
## Specify the language for voice using ISO 639-1 code (default: auto)
#AIDER_VOICE_LANGUAGE=en
## Specify the voice input device name (default: system default)
#AIDER_VOICE_INPUT_DEVICE="MacBook Pro Microphone"
```
<!--[[[end]]]-->


7 changes: 6 additions & 1 deletion aider/website/docs/config/options.md
Original file line number Diff line number Diff line change
Expand Up @@ -75,7 +75,7 @@ usage: aider [-h] [--openai-api-key] [--anthropic-api-key] [--model]
[--suggest-shell-commands | --no-suggest-shell-commands]
[--fancy-input | --no-fancy-input]
[--detect-urls | --no-detect-urls] [--editor]
[--voice-format] [--voice-language]
[--voice-format] [--voice-language] [--voice-input-device]
```

Expand Down Expand Up @@ -701,4 +701,9 @@ Environment variable: `AIDER_VOICE_FORMAT`
Specify the language for voice using ISO 639-1 code (default: auto)
Default: en
Environment variable: `AIDER_VOICE_LANGUAGE`

### `--voice-input-device VOICE_INPUT_DEVICE`
Specify the voice input device name used for recording (default: system default)
Default: system default
Environment variable: `VOICE_INPUT_DEVICE`
<!--[[[end]]]-->

0 comments on commit 82929f6

Please sign in to comment.