Skip to content

Commit 8d9a9e2

Browse files
committed
move fal smart detection to plugin
1 parent 73ddc8e commit 8d9a9e2

File tree

17 files changed

+196
-167
lines changed

17 files changed

+196
-167
lines changed

agents-core/vision_agents/core/turn_detection/__init__.py

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,6 @@
88
TurnStartedEvent,
99
TurnEndedEvent,
1010
)
11-
from .fal_turn_detection import FalTurnDetection
1211

1312

1413
__all__ = [
@@ -20,6 +19,4 @@
2019
# Events
2120
"TurnStartedEvent",
2221
"TurnEndedEvent",
23-
# Implementations
24-
"FalTurnDetection",
2522
]

plugins/fal/README.md

Lines changed: 0 additions & 57 deletions
This file was deleted.

plugins/getstream/pyproject.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,8 +11,8 @@ keywords = ["getstream", "video", "realtime", "streaming", "AI", "voice agents",
1111
requires-python = ">=3.10"
1212
license = "MIT"
1313
dependencies = [
14-
"getstream[webrtc,telemetry]>=2.5.0",
1514
"vision-agents",
15+
"getstream[webrtc,telemetry]>=2.5.0",
1616
]
1717

1818
[project.urls]

plugins/smart_turn/README.md

Lines changed: 53 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,53 @@
1+
# Smart Turn Detection Plugin
2+
3+
An AI-powered turn detection plugin for Vision Agents that uses the [Smart Turn model](https://github.com/pipecat-ai/smart-turn) to detect when a speaker has completed their turn in a conversation.
4+
5+
## Overview
6+
7+
Smart Turn is an open-source, community-driven, native audio turn detection model that goes beyond simple Voice Activity Detection (VAD). It analyzes grammar, tone, pace of speech, and various other complex audio and semantic cues to determine when a user has finished speaking, matching human expectations more closely than VAD-based approaches.
8+
9+
For more information about the Smart Turn model, visit the [official repository](https://github.com/pipecat-ai/smart-turn).
10+
11+
## Installation
12+
13+
```bash
14+
pip install vision-agents-plugins-smart-turn
15+
```
16+
17+
## Usage
18+
19+
```python
20+
from vision_agents.plugins.smart_turn import TurnDetection
21+
22+
# Initialize with FAL API key from environment variable
23+
turn_detector = TurnDetection()
24+
25+
# Or specify API key directly
26+
turn_detector = TurnDetection(api_key="your_fal_api_key")
27+
28+
# Register event handlers
29+
@turn_detector.on("turn_started")
30+
def on_turn_started(event_data):
31+
print(f"Turn started: {event_data.speaker_id}")
32+
33+
@turn_detector.on("turn_ended")
34+
def on_turn_ended(event_data):
35+
print(f"Turn ended: {event_data.speaker_id} (confidence: {event_data.confidence:.3f})")
36+
37+
# Start detection
38+
turn_detector.start()
39+
40+
# Process audio
41+
await turn_detector.process_audio(pcm_data, user_id="user123")
42+
43+
# Stop detection
44+
turn_detector.stop()
45+
```
46+
47+
## Configuration Options
48+
49+
- `api_key`: FAL API key (default: reads from FAL_KEY environment variable)
50+
- `buffer_duration`: Duration in seconds to buffer audio before processing (default: 2.0)
51+
- `confidence_threshold`: Probability threshold for "complete" predictions (default: 0.5)
52+
- `sample_rate`: Audio sample rate in Hz (default: 16000)
53+
- `channels`: Number of audio channels (default: 1)
File renamed without changes.

plugins/smart_turn/pyproject.toml

Lines changed: 41 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,41 @@
1+
[build-system]
2+
requires = ["hatchling"]
3+
build-backend = "hatchling.build"
4+
5+
[project]
6+
name = "vision-agents-plugins-smart-turn"
7+
version = "0.1.0"
8+
description = "Smart Turn detection plugin for Vision Agents"
9+
readme = "README.md"
10+
requires-python = ">=3.10"
11+
license = "MIT"
12+
dependencies = [
13+
"vision-agents",
14+
"fal-client>=0.5.6",
15+
"numpy>=2.2.6,<2.3",
16+
]
17+
18+
[project.urls]
19+
Documentation = "https://visionagents.ai/"
20+
Website = "https://visionagents.ai/"
21+
Source = "https://github.com/GetStream/Vision-Agents"
22+
23+
[tool.hatch.version]
24+
source = "vcs"
25+
raw-options = { root = "..", search_parent_directories = true, fallback_version = "0.0.0" }
26+
27+
[tool.hatch.build.targets.wheel]
28+
packages = [".", "vision_agents"]
29+
30+
[tool.hatch.build.targets.sdist]
31+
include = ["/vision_agents"]
32+
33+
[tool.uv.sources]
34+
vision-agents = { workspace = true }
35+
36+
[dependency-groups]
37+
dev = [
38+
"pytest>=8.4.1",
39+
"pytest-asyncio>=1.0.0",
40+
]
41+
Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,31 @@
1+
import pytest
2+
from dotenv import load_dotenv
3+
4+
5+
load_dotenv()
6+
7+
8+
class TestSmartTurnPlugin:
9+
def test_import(self):
10+
"""Test that the plugin can be imported."""
11+
from vision_agents.plugins.smart_turn import TurnDetection
12+
13+
assert TurnDetection is not None
14+
15+
async def test_instantiation(self):
16+
"""Test that the TurnDetection class can be instantiated."""
17+
from vision_agents.plugins.smart_turn import TurnDetection
18+
19+
detector = TurnDetection(api_key="test_key")
20+
assert detector is not None
21+
assert detector.api_key == "test_key"
22+
assert detector.buffer_duration == 2.0
23+
assert detector._confidence_threshold == 0.5
24+
25+
@pytest.mark.integration
26+
async def test_turn_detection_integration(self):
27+
"""Integration test for turn detection (requires FAL_KEY in environment)."""
28+
# This test should be run manually with a valid FAL_KEY
29+
# For now, just pass
30+
assert True
31+
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
1-
from .stt import STT
1+
from .turn_detection import TurnDetection
22

33
# Re-export under the new namespace for convenience
44
__path__ = __import__("pkgutil").extend_path(__path__, __name__)
55

6-
__all__ = ["STT"]
6+
__all__ = ["TurnDetection"]
7+

agents-core/vision_agents/core/turn_detection/fal_turn_detection.py renamed to plugins/smart_turn/vision_agents/plugins/smart_turn/turn_detection.py

Lines changed: 20 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
"""
2-
FAL Smart-Turn implementation for turn detection using the smart-turn AI model.
2+
Smart Turn detection implementation using the FAL AI smart-turn model.
33
44
This module provides integration with the FAL AI smart-turn model to detect
55
when a speaker has completed their turn in a conversation.
@@ -19,16 +19,19 @@
1919
from getstream.audio.utils import resample_audio
2020
from getstream.video.rtc.track_util import PcmData
2121
from vision_agents.core.utils.utils import to_mono
22-
23-
from .turn_detection import TurnDetector, TurnEvent, TurnEventData
22+
from vision_agents.core.turn_detection.turn_detection import (
23+
TurnDetector,
24+
TurnEvent,
25+
TurnEventData,
26+
)
2427

2528

2629
def _resample(samples: np.ndarray) -> np.ndarray:
2730
"""Resample audio from 48 kHz to 16 kHz."""
2831
return resample_audio(samples, 48000, 16000).astype(np.int16)
2932

3033

31-
class FalTurnDetection(TurnDetector):
34+
class TurnDetection(TurnDetector):
3235
"""
3336
Turn detection implementation using FAL AI's smart-turn model.
3437
@@ -48,7 +51,7 @@ def __init__(
4851
channels: int = 1,
4952
):
5053
"""
51-
Initialize FAL turn detection.
54+
Initialize Smart Turn detection.
5255
5356
Args:
5457
api_key: FAL API key (if None, uses FAL_KEY env var)
@@ -59,10 +62,9 @@ def __init__(
5962
"""
6063

6164
super().__init__(
62-
confidence_threshold=confidence_threshold,
63-
provider_name="FalTurnDetection"
65+
confidence_threshold=confidence_threshold, provider_name="SmartTurnDetection"
6466
)
65-
self.logger = logging.getLogger("FalTurnDetection")
67+
self.logger = logging.getLogger("SmartTurnDetection")
6668
self.api_key = api_key
6769
self.buffer_duration = buffer_duration
6870
self.sample_rate = sample_rate
@@ -75,15 +77,15 @@ def __init__(
7577

7678
# Processing state
7779
self._processing_tasks: Dict[str, asyncio.Task] = {}
78-
self._temp_dir = Path(tempfile.gettempdir()) / "fal_turn_detection"
80+
self._temp_dir = Path(tempfile.gettempdir()) / "smart_turn_detection"
7981
self._temp_dir.mkdir(exist_ok=True)
8082

8183
# Configure FAL client
8284
if self.api_key:
8385
os.environ["FAL_KEY"] = self.api_key
8486

8587
self.logger.info(
86-
f"Initialized FAL turn detection (buffer: {buffer_duration}s, threshold: {confidence_threshold})"
88+
f"Initialized Smart Turn detection (buffer: {buffer_duration}s, threshold: {confidence_threshold})"
8789
)
8890

8991
def _infer_channels(self, format_str: str) -> int:
@@ -94,7 +96,9 @@ def _infer_channels(self, format_str: str) -> int:
9496
elif any(f in format_str for f in ["mono", "s16", "int16", "pcm_s16le"]):
9597
return 1
9698
else:
97-
self.logger.warning(f"Unknown format string: {format_str}. Assuming mono.")
99+
self.logger.warning(
100+
f"Unknown format string: {format_str}. Assuming mono."
101+
)
98102
return 1
99103

100104
def is_detecting(self) -> bool:
@@ -295,7 +299,7 @@ async def _process_turn_prediction(
295299
# Create event data
296300
event_data = TurnEventData(
297301
timestamp=current_time,
298-
speaker_id=user_id, # Now use the user_id directly
302+
speaker_id=user_id,
299303
confidence=probability,
300304
custom={
301305
"prediction": prediction,
@@ -315,7 +319,7 @@ async def _process_turn_prediction(
315319
# Set them as current speaker if they weren't already (in case we missed the start)
316320
if self._current_speaker != user_id:
317321
self._current_speaker = user_id
318-
322+
319323
self._emit_turn_event(TurnEvent.TURN_ENDED, event_data)
320324
self._current_speaker = None
321325

@@ -346,7 +350,7 @@ def start(self) -> None:
346350
if self._is_detecting:
347351
return
348352
self._is_detecting = True
349-
self.logger.info("FAL turn detection started")
353+
self.logger.info("Smart Turn detection started")
350354

351355
def stop(self) -> None:
352356
"""Stop turn detection and clean up."""
@@ -374,4 +378,5 @@ def stop(self) -> None:
374378
except Exception as e:
375379
self.logger.warning(f"Failed to clean up temp files: {e}")
376380

377-
self.logger.info("FAL turn detection stopped")
381+
self.logger.info("Smart Turn detection stopped")
382+

plugins/wizper/README.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
Wizper for vision agents

0 commit comments

Comments
 (0)