Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
63 commits
Select commit Hold shift + click to select a range
8b7d0ef
feat(cli): add native local-first voice input via whisper
fayerman-source Feb 1, 2026
0507561
fix(voice): Resolve code review issues for voice input feature
fayerman-source Feb 4, 2026
a5b1345
fix(voice): Update status text to show correct Alt+R key binding
fayerman-source Feb 5, 2026
07ae122
fix(voice): implement event-based architecture to eliminate infinite …
fayerman-source Feb 6, 2026
c1eb102
test(voice): add comprehensive test suite for voice input feature
fayerman-source Feb 6, 2026
7fe8ce8
fix(voice): enable auto language detection for multilingual support
fayerman-source Feb 6, 2026
6899ba4
fix: race condition protection, transcribing indicator, and voice hel…
fayerman-source Feb 7, 2026
e229bf3
Merge upstream main with shortcuts help feature and voice input
fayerman-source Feb 7, 2026
ce4b23c
security: fix command injection vulnerability in voice input
fayerman-source Feb 7, 2026
c6a9409
fix: resolve merge conflicts with upstream main
fayerman-source Feb 10, 2026
5881c3e
fix: resolve merge conflicts with upstream main
fayerman-source Feb 12, 2026
c6c7329
fix: resolve merge conflicts with upstream main
fayerman-source Feb 14, 2026
f9a71af
feat(voice): enhance voice input with static config and improved stab…
fayerman-source Feb 14, 2026
6a9196d
chore: merge latest changes from main
fayerman-source Feb 14, 2026
ede0c6a
fix(voice): resolve resource leak and portability issues
fayerman-source Feb 14, 2026
0777cc0
fix(cli): resolve spurious startup warnings and false-positive infini…
fayerman-source Feb 14, 2026
b86b1a1
Merge branch 'main' into feat/voice-input-clean
fayerman-source Feb 16, 2026
9b97b2e
Merge remote-tracking branch 'upstream/main' into feat/voice-input-clean
fayerman-source Feb 19, 2026
56fa659
Merge remote-tracking branch 'upstream/main' into feat/voice-input-…
fayerman-source Feb 19, 2026
0eeeea4
Merge remote-tracking branch 'upstream/main' into feat/voice-input-clean
fayerman-source Feb 19, 2026
c26c427
Merge remote-tracking branch 'upstream/main' into feat/voice-input-clean
fayerman-source Feb 19, 2026
f531123
Merge remote-tracking branch 'upstream/main' into feat/voice-input-clean
fayerman-source Feb 19, 2026
acea2bb
Merge remote-tracking branch 'upstream/main' into feat/voice-input-clean
fayerman-source Feb 19, 2026
70d9507
Merge remote-tracking branch 'upstream/main' into feat/voice-input-clean
fayerman-source Feb 20, 2026
d65996a
Merge branch 'main' into feat/voice-input-clean
fayerman-source Feb 20, 2026
b82d070
Merge remote-tracking branch 'upstream/main' into feat/voice-input-clean
fayerman-source Feb 20, 2026
f21adc7
Merge branch 'feat/voice-input-clean' of https://github.com/fayerman-…
fayerman-source Feb 20, 2026
8a10207
Merge branch 'main' into feat/voice-input-clean
fayerman-source Feb 20, 2026
07d633a
Merge remote-tracking branch 'upstream/main' into feat/voice-input-clean
fayerman-source Feb 22, 2026
a45968f
Merge branch 'feat/voice-input-clean' of https://github.com/fayerman-…
fayerman-source Feb 22, 2026
caea370
Merge remote-tracking branch 'upstream/main' into feat/voice-input-clean
fayerman-source Feb 22, 2026
db85b26
Merge branch 'main' into feat/voice-input-clean
fayerman-source Feb 22, 2026
51e55fa
Merge branch 'main' into feat/voice-input-clean
fayerman-source Feb 23, 2026
b2f5d00
Merge branch 'main' into feat/voice-input-clean
fayerman-source Feb 23, 2026
f44fabc
Merge remote-tracking branch 'upstream/main' into feat/voice-input-clean
fayerman-source Feb 23, 2026
3ead9fe
Merge branch 'feat/voice-input-clean' of https://github.com/fayerman-…
fayerman-source Feb 23, 2026
e840625
fix(voice): remove stale reference to deleted VOICE_INFINITE_LOOP_ANA…
fayerman-source Feb 23, 2026
690859d
feat(voice): add voice.enabled setting to toggle voice input
fayerman-source Feb 24, 2026
e8e67e3
feat(voice): pluggable backend with zero-install Gemini transcription
fayerman-source Feb 25, 2026
0f00455
refactor(voice): move backends to packages/core, use coreEvents for t…
fayerman-source Feb 25, 2026
5bfaeae
fix(voice): resolve TypeScript compile errors in voice backends
fayerman-source Feb 25, 2026
7fccefb
fix(voice): fix AppContainer voiceConfig type for TypeScript strict n…
fayerman-source Feb 25, 2026
c569b7a
fix(voice): address three UX bugs found in manual testing
fayerman-source Feb 25, 2026
668530f
refactor(voice): make /voice a pure settings command; fix transcripti…
fayerman-source Feb 25, 2026
b749764
fix(voice): force Ink render before Gemini API call to show transcrib…
fayerman-source Feb 25, 2026
eeaf709
fix(voice): remove duplicate emoji from voice state placeholders
fayerman-source Feb 25, 2026
7fabd02
feat(voice): add configurable silence threshold for whisper-level sen…
fayerman-source Feb 25, 2026
dda2251
feat(voice): add subcommand autocomplete hints to /voice
fayerman-source Feb 25, 2026
9ae2e64
docs(voice): document all voice settings in settings.md and configura…
fayerman-source Feb 25, 2026
9267ef5
chore: merge upstream/main, resolve keyMatchers.test.ts conflict
fayerman-source Feb 25, 2026
24c91fc
fix(voice): address PR review comments from jacob314
fayerman-source Feb 25, 2026
7d331cf
test(voice): add cancelRecording tests and cancel() to backend mocks
fayerman-source Feb 26, 2026
6e616e9
fix(voice): disable shortcut triggers when voice.enabled is false
fayerman-source Feb 26, 2026
43317a2
fix(voice): suppress key handler and debug noise when voice is disabled
fayerman-source Feb 26, 2026
d7cecf7
fix(cli): support quota error fallbacks for all authentication types …
sehoon38 Feb 26, 2026
4eabeba
feat(plan): adapt planning workflow based on complexity of task (#20322)
jerop Feb 26, 2026
694e464
feat(voice): style /voice status output with themed VoiceStatus compo…
fayerman-source Feb 26, 2026
5c1552e
Merge remote-tracking branch 'upstream/main' into feat/voice-input-clean
fayerman-source Feb 26, 2026
74fde52
fix(voice): improve error message when sox and arecord are both missing
fayerman-source Feb 26, 2026
32732ee
Merge branch 'main' into feat/voice-input-clean
fayerman-source Feb 26, 2026
2e4231b
feat(voice): polish recording UI with animated spinner and improved s…
fayerman-source Feb 26, 2026
7a57af6
Merge branch 'main' into feat/voice-input-clean
fayerman-source Feb 26, 2026
963d7ff
fix(voice): regenerate settings schema and simplify test mock
fayerman-source Feb 27, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
28 changes: 21 additions & 7 deletions docs/cli/plan-mode.md
Original file line number Diff line number Diff line change
Expand Up @@ -80,14 +80,28 @@ manually during a session.

### Planning Workflow

Plan Mode uses an adaptive planning workflow where the research depth, plan
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

please revert changes to unrelated files such as plan-mode.md

structure, and consultation level are proportional to the task's complexity:

1. **Explore & Analyze:** Analyze requirements and use read-only tools to map
the codebase and validate assumptions. For complex tasks, identify at least
two viable implementation approaches.
2. **Consult:** Present a summary of the identified approaches via [`ask_user`]
to obtain a selection. For simple or canonical tasks, this step may be
skipped.
3. **Draft:** Once an approach is selected, write a detailed implementation
plan to the plans directory.
affected modules and identify dependencies.
2. **Consult:** The depth of consultation is proportional to the task's
complexity:
- **Simple Tasks:** Proceed directly to drafting.
- **Standard Tasks:** Present a summary of viable approaches via
[`ask_user`] for selection.
- **Complex Tasks:** Present detailed trade-offs for at least two viable
approaches via [`ask_user`] and obtain approval before drafting.
3. **Draft:** Write a detailed implementation plan to the
[plans directory](#custom-plan-directory-and-policies). The plan's structure
adapts to the task:
- **Simple Tasks:** Focused on specific **Changes** and **Verification**
steps.
- **Standard Tasks:** Includes an **Objective**, **Key Files & Context**,
**Implementation Steps**, and **Verification & Testing**.
- **Complex Tasks:** Comprehensive plans including **Background &
Motivation**, **Scope & Impact**, **Proposed Solution**, **Alternatives
Considered**, and **Migration & Rollback** strategies.
4. **Review & Approval:** Use the [`exit_plan_mode`] tool to present the plan
and formally request approval.
- **Approve:** Exit Plan Mode and start implementation.
Expand Down
9 changes: 9 additions & 0 deletions docs/cli/settings.md
Original file line number Diff line number Diff line change
Expand Up @@ -74,6 +74,15 @@ they appear in the UI.
| Loading Phrases | `ui.loadingPhrases` | What to show while the model is working: tips, witty comments, both, or nothing. | `"tips"` |
| Screen Reader Mode | `ui.accessibility.screenReader` | Render output in plain-text to be more screen reader accessible | `false` |

### Voice

| UI Label | Setting | Description | Default |
| --------------------- | ------------------------ | --------------------------------------------------------------------------------------------------------------------------------------------------------------- | ----------- |
| Voice Input | `voice.enabled` | Enable voice input. When enabled, press **Alt+R** or **Ctrl+Q** to start/stop recording. | `false` |
| Transcription Backend | `voice.provider` | Transcription backend to use: `gemini` (zero-install, uses existing Gemini API auth) or `whisper` (local binary). | `"gemini"` |
| Silence Detection | `voice.silenceThreshold` | RMS energy threshold (0–1000) below which audio is discarded as silence. Lower values capture quieter speech (e.g. whispering). `0` disables silence detection. | `80` |
| Whisper Binary Path | `voice.whisperPath` | Path to the Whisper executable. Only used when `voice.provider` is `"whisper"` (e.g. `/usr/local/bin/whisper`). | `undefined` |

### IDE

| UI Label | Setting | Description | Default |
Expand Down
27 changes: 27 additions & 0 deletions docs/reference/configuration.md
Original file line number Diff line number Diff line change
Expand Up @@ -339,6 +339,33 @@ their corresponding top-level category object in your `settings.json` file.
- **Default:** `false`
- **Requires restart:** Yes

#### `voice`

- **`voice.enabled`** (boolean):
- **Description:** Enable voice input. When enabled, press **Alt+R** or
**Ctrl+Q** to start/stop recording. Use `/voice enable` or `/voice disable`
to toggle.
- **Default:** `false`

- **`voice.provider`** (string: `"gemini"` | `"whisper"`):
- **Description:** Transcription backend. `gemini` uses the Gemini API with
your existing auth (zero additional setup). `whisper` uses a locally
installed Whisper binary for offline/faster transcription.
- **Default:** `"gemini"`

- **`voice.silenceThreshold`** (number, 0–1000):
- **Description:** RMS energy threshold for silence detection. Audio below
this level is discarded without an API call. Lower values capture quieter
speech (e.g. whispering). Set to `0` to disable silence detection and always
transcribe. Use `/voice sensitivity <value>` to adjust at runtime.
- **Default:** `80`

- **`voice.whisperPath`** (string):
- **Description:** Path to the Whisper executable. Only used when
`voice.provider` is `"whisper"` (e.g. `/usr/local/bin/whisper` or
`~/.local/bin/whisper`). Use `/voice set-path <path>` to set at runtime.
- **Default:** `undefined`

#### `ide`

- **`ide.enabled`** (boolean):
Expand Down
6 changes: 6 additions & 0 deletions docs/reference/keyboard-shortcuts.md
Original file line number Diff line number Diff line change
Expand Up @@ -94,6 +94,12 @@ available combinations.
| Open the current prompt in an external editor. | `Ctrl + X` |
| Paste from the clipboard. | `Ctrl + V`<br />`Cmd + V`<br />`Alt + V` |

#### Voice Input

| Action | Keys |
| ----------------------------------------------- | ------------------------- |
| Toggle voice input recording (Alt+R or Ctrl+Q). | `Alt + R`<br />`Ctrl + Q` |

#### App Controls

| Action | Keys |
Expand Down
16 changes: 16 additions & 0 deletions packages/cli/src/config/keyBindings.ts
Original file line number Diff line number Diff line change
Expand Up @@ -97,6 +97,9 @@ export enum Command {
CLEAR_SCREEN = 'app.clearScreen',
RESTART_APP = 'app.restart',
SUSPEND_APP = 'app.suspend',

// Voice Input
VOICE_INPUT = 'input.voice',
}

/**
Expand Down Expand Up @@ -297,6 +300,12 @@ export const defaultKeyBindings: KeyBindingConfig = {
[Command.CLEAR_SCREEN]: [{ key: 'l', ctrl: true }],
[Command.RESTART_APP]: [{ key: 'r' }],
[Command.SUSPEND_APP]: [{ key: 'z', ctrl: true }],

// Voice Input
[Command.VOICE_INPUT]: [
{ key: 'r', alt: true }, // Alt+R
{ key: 'q', ctrl: true }, // Ctrl+Q
],
};

interface CommandCategory {
Expand Down Expand Up @@ -391,6 +400,10 @@ export const commandCategories: readonly CommandCategory[] = [
Command.PASTE_CLIPBOARD,
],
},
{
title: 'Voice Input',
commands: [Command.VOICE_INPUT],
},
{
title: 'App Controls',
commands: [
Expand Down Expand Up @@ -525,4 +538,7 @@ export const commandDescriptions: Readonly<Record<Command, string>> = {
[Command.CLEAR_SCREEN]: 'Clear the terminal screen and redraw the UI.',
[Command.RESTART_APP]: 'Restart the application.',
[Command.SUSPEND_APP]: 'Suspend the CLI and move it to the background.',

// Voice Input
[Command.VOICE_INPUT]: 'Toggle voice input recording (Alt+R or Ctrl+Q).',
};
51 changes: 51 additions & 0 deletions packages/cli/src/config/settingsSchema.ts
Original file line number Diff line number Diff line change
Expand Up @@ -766,6 +766,57 @@ const SETTINGS_SCHEMA = {
},
},

voice: {
type: 'object',
label: 'Voice Input',
category: 'General',
requiresRestart: false,
default: {},
description: 'Settings for voice input.',
properties: {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

need to add showInDialog: false,
Otherwise there is a distracting entry for Voice Input with type Object

enabled: {
type: 'boolean',
label: 'Enable Voice Input',
category: 'General',
requiresRestart: false,
default: false,
description: 'Enable voice input support.',
showInDialog: true,
},
provider: {
type: 'string',
label: 'Transcription Backend',
category: 'General',
requiresRestart: false,
default: undefined as string | undefined,
description:
'Transcription backend: "gemini" (default, zero-install) or "whisper" (local).',
showInDialog: true,
},
whisperPath: {
type: 'string',
label: 'Whisper Binary Path',
category: 'General',
requiresRestart: false,
default: undefined as string | undefined,
description:
'Path to the whisper executable. Only used when provider is "whisper".',
showInDialog: true,
},
silenceThreshold: {
type: 'number',
label: 'Silence Detection Threshold',
category: 'General',
requiresRestart: false,
default: 80,
description:
'RMS energy threshold (0–1000) below which audio is discarded as silence. ' +
'Lower values allow quieter speech such as whispering. 0 disables silence detection.',
showInDialog: true,
},
},
},

ide: {
type: 'object',
label: 'IDE',
Expand Down
5 changes: 3 additions & 2 deletions packages/cli/src/nonInteractiveCli.ts
Original file line number Diff line number Diff line change
Expand Up @@ -247,11 +247,12 @@ export async function runNonInteractive({
settings,
);
// If a slash command is found and returns a prompt, use it.
// Otherwise, slashCommandResult falls through to the default prompt
// handling.
// Otherwise, if it was a slash command, we are done.
if (slashCommandResult) {
// eslint-disable-next-line @typescript-eslint/no-unsafe-type-assertion
query = slashCommandResult as Part[];
} else {
return;
}
}

Expand Down
5 changes: 5 additions & 0 deletions packages/cli/src/nonInteractiveCliCommands.ts
Original file line number Diff line number Diff line change
Expand Up @@ -90,6 +90,11 @@ export const handleSlashCommand = async (
switch (result.type) {
case 'submit_prompt':
return result.content;
case 'message': {
const prefix = result.messageType?.toUpperCase() || 'INFO';
process.stdout.write(`[${prefix}] ${result.content}\n`);
return;
}
case 'confirm_shell_commands':
// This result indicates a command attempted to confirm shell commands.
// However note that currently, ShellTool is excluded in non-interactive
Expand Down
7 changes: 6 additions & 1 deletion packages/cli/src/services/BuiltinCommandLoader.ts
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,7 @@ import { shellsCommand } from '../ui/commands/shellsCommand.js';
import { vimCommand } from '../ui/commands/vimCommand.js';
import { setupGithubCommand } from '../ui/commands/setupGithubCommand.js';
import { terminalSetupCommand } from '../ui/commands/terminalSetupCommand.js';
import { voiceCommand } from '../ui/commands/voiceCommand.js';

/**
* Loads the core, hard-coded slash commands that are an integral part
Expand All @@ -73,7 +74,10 @@ export class BuiltinCommandLoader implements ICommandLoader {
* @param _signal An AbortSignal (unused for this synchronous loader).
* @returns A promise that resolves to an array of `SlashCommand` objects.
*/
async loadCommands(_signal: AbortSignal): Promise<SlashCommand[]> {
async loadCommands(signal: AbortSignal): Promise<SlashCommand[]> {
if (signal.aborted) {
return [];
}
const handle = startupProfiler.start('load_builtin_commands');

const isNightlyBuild = await isNightly(process.cwd());
Expand Down Expand Up @@ -185,6 +189,7 @@ export class BuiltinCommandLoader implements ICommandLoader {
vimCommand,
setupGithubCommand,
terminalSetupCommand,
voiceCommand,
];
handle?.end();
return allDefinitions.filter((cmd): cmd is SlashCommand => cmd !== null);
Expand Down
71 changes: 45 additions & 26 deletions packages/cli/src/test-utils/render.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,8 @@ import {
import { type HistoryItemToolGroup, StreamingState } from '../ui/types.js';
import { ToolActionsProvider } from '../ui/contexts/ToolActionsContext.js';
import { AskUserActionsProvider } from '../ui/contexts/AskUserActionsContext.js';
import { VoiceContext } from '../ui/contexts/VoiceContext.js';
import type { VoiceInputReturn } from '../ui/hooks/useVoiceInput.js';
import { TerminalProvider } from '../ui/contexts/TerminalContext.js';
import {
OverflowProvider,
Expand Down Expand Up @@ -554,6 +556,19 @@ export const mockAppState: AppState = {
startupWarnings: [],
};

const mockVoiceReturn: VoiceInputReturn = {
isEnabled: true,
state: {
isRecording: false,
isTranscribing: false,
error: null,
},
startRecording: vi.fn(async () => {}),
stopRecording: vi.fn(async () => {}),
cancelRecording: vi.fn(async () => {}),
toggleRecording: vi.fn(async () => {}),
};

const mockUIActions: UIActions = {
handleThemeSelect: vi.fn(),
closeThemeDialog: vi.fn(),
Expand Down Expand Up @@ -634,6 +649,7 @@ export const renderWithProviders = (
uiActions,
persistentState,
appState = mockAppState,
voice = mockVoiceReturn,
}: {
shellFocus?: boolean;
settings?: LoadedSettings;
Expand All @@ -648,6 +664,7 @@ export const renderWithProviders = (
set?: typeof persistentStateMock.set;
};
appState?: AppState;
voice?: VoiceInputReturn;
} = {},
): RenderInstance & {
simulateClick: (
Expand Down Expand Up @@ -741,32 +758,34 @@ export const renderWithProviders = (
config={config}
toolCalls={allToolCalls}
>
<AskUserActionsProvider
request={null}
onSubmit={vi.fn()}
onCancel={vi.fn()}
>
<KeypressProvider>
<MouseProvider
mouseEventsEnabled={mouseEventsEnabled}
>
<TerminalProvider>
<ScrollProvider>
<ContextCapture>
<Box
width={terminalWidth}
flexShrink={0}
flexGrow={0}
flexDirection="column"
>
{component}
</Box>
</ContextCapture>
</ScrollProvider>
</TerminalProvider>
</MouseProvider>
</KeypressProvider>
</AskUserActionsProvider>
<VoiceContext.Provider value={voice}>
<AskUserActionsProvider
request={null}
onSubmit={vi.fn()}
onCancel={vi.fn()}
>
<KeypressProvider>
<MouseProvider
mouseEventsEnabled={mouseEventsEnabled}
>
<TerminalProvider>
<ScrollProvider>
<ContextCapture>
<Box
width={terminalWidth}
flexShrink={0}
flexGrow={0}
flexDirection="column"
>
{component}
</Box>
</ContextCapture>
</ScrollProvider>
</TerminalProvider>
</MouseProvider>
</KeypressProvider>
</AskUserActionsProvider>
</VoiceContext.Provider>
</ToolActionsProvider>
</OverflowProvider>
</UIActionsContext.Provider>
Expand Down
Loading