Skip to content

Commit 5d4baad

Browse files
committed
feature #688 [Platform][ElevenLabs] Use capability-based speech-to-text detection (OskarStark)
This PR was squashed before being merged into the main branch. Discussion ---------- [Platform][ElevenLabs] Use capability-based speech-to-text detection | Q | A | ------------- | --- | Bug fix? | no | New feature? | yes | Docs? | no | Issues | -- | License | MIT Replace hardcoded model name checking with API capability detection for speech-to-text models. This makes the code more maintainable and allows for dynamic capability discovery. <img width="2190" height="690" alt="CleanShot 2025-09-27 at 11 56 06@2x" src="https://github.com/user-attachments/assets/cbdb944d-c4e6-41ad-8a72-dafbe1723389" /> Commits ------- ad663e5 [Platform][ElevenLabs] Use capability-based speech-to-text detection
2 parents 9986865 + ad663e5 commit 5d4baad

File tree

6 files changed

+182
-12
lines changed

6 files changed

+182
-12
lines changed

src/platform/src/Bridge/ElevenLabs/ElevenLabs.php

Lines changed: 19 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,13 +11,15 @@
1111

1212
namespace Symfony\AI\Platform\Bridge\ElevenLabs;
1313

14+
use Symfony\AI\Platform\Capability;
1415
use Symfony\AI\Platform\Model;
1516

1617
/**
1718
* @author Guillaume Loulier <personal@guillaumeloulier.fr>
1819
*/
1920
final class ElevenLabs extends Model
2021
{
22+
// text-to-speech models
2123
public const ELEVEN_V3 = 'eleven_v3';
2224
public const ELEVEN_TTV_V3 = 'eleven_ttv_v3';
2325
public const ELEVEN_MULTILINGUAL_V2 = 'eleven_multilingual_v2';
@@ -28,11 +30,27 @@ final class ElevenLabs extends Model
2830
public const ELEVEN_MULTILINGUAL_STS_V2 = 'eleven_multilingual_sts_v2';
2931
public const ELEVEN_MULTILINGUAL_ttv_V2 = 'eleven_multilingual_ttv_v2';
3032
public const ELEVEN_ENGLISH_STS_V2 = 'eleven_english_sts_v2';
33+
34+
// speech-to-text models
3135
public const SCRIBE_V1 = 'scribe_v1';
3236
public const SCRIBE_V1_EXPERIMENTAL = 'scribe_v1_experimental';
3337

3438
public function __construct(string $name, array $options = [])
3539
{
36-
parent::__construct($name, [], $options);
40+
$capabilities = [
41+
Capability::INPUT_TEXT,
42+
Capability::OUTPUT_AUDIO,
43+
Capability::TEXT_TO_SPEECH,
44+
];
45+
46+
if (\in_array($name, [self::SCRIBE_V1, self::SCRIBE_V1_EXPERIMENTAL], true)) {
47+
$capabilities = [
48+
Capability::INPUT_AUDIO,
49+
Capability::OUTPUT_TEXT,
50+
Capability::SPEECH_TO_TEXT,
51+
];
52+
}
53+
54+
parent::__construct($name, $capabilities, $options);
3755
}
3856
}

src/platform/src/Bridge/ElevenLabs/ElevenLabsClient.php

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@
1111

1212
namespace Symfony\AI\Platform\Bridge\ElevenLabs;
1313

14+
use Symfony\AI\Platform\Capability;
1415
use Symfony\AI\Platform\Exception\InvalidArgumentException;
1516
use Symfony\AI\Platform\Model;
1617
use Symfony\AI\Platform\ModelClientInterface;
@@ -41,7 +42,7 @@ public function request(Model $model, array|string $payload, array $options = []
4142
throw new InvalidArgumentException(\sprintf('The payload must be an array, received "%s".', get_debug_type($payload)));
4243
}
4344

44-
if (\in_array($model->getName(), [ElevenLabs::SCRIBE_V1, ElevenLabs::SCRIBE_V1_EXPERIMENTAL], true)) {
45+
if ($model->supports(Capability::SPEECH_TO_TEXT)) {
4546
return $this->doSpeechToTextRequest($model, $payload);
4647
}
4748

src/platform/src/Capability.php

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,11 +11,15 @@
1111

1212
namespace Symfony\AI\Platform;
1313

14+
use OskarStark\Enum\Trait\Comparable;
15+
1416
/**
1517
* @author Christopher Hertel <mail@christopher-hertel.de>
1618
*/
1719
enum Capability: string
1820
{
21+
use Comparable;
22+
1923
// INPUT
2024
case INPUT_AUDIO = 'input-audio';
2125
case INPUT_IMAGE = 'input-image';

src/platform/src/Model.php

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -51,7 +51,7 @@ public function getCapabilities(): array
5151

5252
public function supports(Capability $capability): bool
5353
{
54-
return \in_array($capability, $this->capabilities, true);
54+
return $capability->equalsOneOf($this->capabilities);
5555
}
5656

5757
/**

src/platform/tests/Bridge/ElevenLabs/ElevenLabsClientTest.php

Lines changed: 21 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,6 @@ public function testSupportsModel()
3030
$client = new ElevenLabsClient(
3131
new MockHttpClient(),
3232
'my-api-key',
33-
'https://api.elevenlabs.io/v1',
3433
);
3534

3635
$this->assertTrue($client->supports(new ElevenLabs(ElevenLabs::ELEVEN_MULTILINGUAL_V2)));
@@ -53,7 +52,6 @@ public function testClientCannotPerformWithInvalidModel()
5352
$client = new ElevenLabsClient(
5453
$mockHttpClient,
5554
'my-api-key',
56-
'https://api.elevenlabs.io/v1',
5755
);
5856

5957
$payload = $normalizer->normalize(Audio::fromFile(\dirname(__DIR__, 5).'/fixtures/audio.mp3'));
@@ -69,7 +67,6 @@ public function testClientCannotPerformSpeechToTextRequestWithInvalidPayload()
6967
$client = new ElevenLabsClient(
7068
new MockHttpClient(),
7169
'my-api-key',
72-
'https://api.elevenlabs.io/v1',
7370
);
7471

7572
$this->expectException(InvalidArgumentException::class);
@@ -90,7 +87,6 @@ public function testClientCanPerformSpeechToTextRequest()
9087
$client = new ElevenLabsClient(
9188
$httpClient,
9289
'my-api-key',
93-
'https://api.elevenlabs.io/v1',
9490
);
9591

9692
$payload = $normalizer->normalize(Audio::fromFile(\dirname(__DIR__, 5).'/fixtures/audio.mp3'));
@@ -100,6 +96,27 @@ public function testClientCanPerformSpeechToTextRequest()
10096
$this->assertSame(1, $httpClient->getRequestsCount());
10197
}
10298

99+
public function testClientCanPerformSpeechToTextRequestWithExperimentalModel()
100+
{
101+
$httpClient = new MockHttpClient([
102+
new JsonMockResponse([
103+
'text' => 'foo',
104+
]),
105+
]);
106+
$normalizer = new AudioNormalizer();
107+
108+
$client = new ElevenLabsClient(
109+
$httpClient,
110+
'my-api-key',
111+
);
112+
113+
$payload = $normalizer->normalize(Audio::fromFile(\dirname(__DIR__, 5).'/fixtures/audio.mp3'));
114+
115+
$client->request(new ElevenLabs(ElevenLabs::SCRIBE_V1_EXPERIMENTAL), $payload);
116+
117+
$this->assertSame(1, $httpClient->getRequestsCount());
118+
}
119+
103120
public function testClientCannotPerformTextToSpeechRequestWithoutValidPayload()
104121
{
105122
$mockHttpClient = new MockHttpClient([
@@ -115,7 +132,6 @@ public function testClientCannotPerformTextToSpeechRequestWithoutValidPayload()
115132
$client = new ElevenLabsClient(
116133
$mockHttpClient,
117134
'my-api-key',
118-
'https://api.elevenlabs.io/v1',
119135
);
120136

121137
$this->expectException(InvalidArgumentException::class);
@@ -143,7 +159,6 @@ public function testClientCanPerformTextToSpeechRequest()
143159
$client = new ElevenLabsClient(
144160
$httpClient,
145161
'my-api-key',
146-
'https://api.elevenlabs.io/v1',
147162
);
148163

149164
$client->request(new ElevenLabs(ElevenLabs::ELEVEN_MULTILINGUAL_V2, [
@@ -172,7 +187,6 @@ public function testClientCanPerformTextToSpeechRequestWhenVoiceKeyIsProvidedAsR
172187
$client = new ElevenLabsClient(
173188
$httpClient,
174189
'my-api-key',
175-
'https://api.elevenlabs.io/v1',
176190
);
177191

178192
$client->request(new ElevenLabs(ElevenLabs::ELEVEN_MULTILINGUAL_V2), [
@@ -201,7 +215,6 @@ public function testClientCanPerformTextToSpeechRequestAsStream()
201215
$client = new ElevenLabsClient(
202216
$httpClient,
203217
'my-api-key',
204-
'https://api.elevenlabs.io/v1',
205218
);
206219

207220
$result = $client->request(new ElevenLabs(ElevenLabs::ELEVEN_MULTILINGUAL_V2, [
@@ -232,7 +245,6 @@ public function testClientCanPerformTextToSpeechRequestAsStreamVoiceKeyIsProvide
232245
$client = new ElevenLabsClient(
233246
$httpClient,
234247
'my-api-key',
235-
'https://api.elevenlabs.io/v1',
236248
);
237249

238250
$result = $client->request(new ElevenLabs(ElevenLabs::ELEVEN_MULTILINGUAL_V2), [
Lines changed: 135 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,135 @@
1+
<?php
2+
3+
/*
4+
* This file is part of the Symfony package.
5+
*
6+
* (c) Fabien Potencier <fabien@symfony.com>
7+
*
8+
* For the full copyright and license information, please view the LICENSE
9+
* file that was distributed with this source code.
10+
*/
11+
12+
namespace Symfony\AI\Platform\Tests\Bridge\ElevenLabs;
13+
14+
use PHPUnit\Framework\Attributes\DataProvider;
15+
use PHPUnit\Framework\TestCase;
16+
use Symfony\AI\Platform\Bridge\ElevenLabs\ElevenLabs;
17+
use Symfony\AI\Platform\Capability;
18+
19+
final class ElevenLabsTest extends TestCase
20+
{
21+
public function testSpeechToTextModelHasCorrectCapabilities()
22+
{
23+
$model = new ElevenLabs(ElevenLabs::SCRIBE_V1);
24+
25+
$this->assertTrue($model->supports(Capability::INPUT_AUDIO));
26+
$this->assertTrue($model->supports(Capability::OUTPUT_TEXT));
27+
$this->assertTrue($model->supports(Capability::SPEECH_TO_TEXT));
28+
$this->assertFalse($model->supports(Capability::INPUT_TEXT));
29+
$this->assertFalse($model->supports(Capability::OUTPUT_AUDIO));
30+
$this->assertFalse($model->supports(Capability::TEXT_TO_SPEECH));
31+
}
32+
33+
public function testSpeechToTextExperimentalModelHasCorrectCapabilities()
34+
{
35+
$model = new ElevenLabs(ElevenLabs::SCRIBE_V1_EXPERIMENTAL);
36+
37+
$this->assertTrue($model->supports(Capability::INPUT_AUDIO));
38+
$this->assertTrue($model->supports(Capability::OUTPUT_TEXT));
39+
$this->assertTrue($model->supports(Capability::SPEECH_TO_TEXT));
40+
$this->assertFalse($model->supports(Capability::INPUT_TEXT));
41+
$this->assertFalse($model->supports(Capability::OUTPUT_AUDIO));
42+
$this->assertFalse($model->supports(Capability::TEXT_TO_SPEECH));
43+
}
44+
45+
public function testTextToSpeechModelHasCorrectCapabilities()
46+
{
47+
$model = new ElevenLabs(ElevenLabs::ELEVEN_MULTILINGUAL_V2);
48+
49+
$this->assertTrue($model->supports(Capability::INPUT_TEXT));
50+
$this->assertTrue($model->supports(Capability::OUTPUT_AUDIO));
51+
$this->assertTrue($model->supports(Capability::TEXT_TO_SPEECH));
52+
$this->assertFalse($model->supports(Capability::INPUT_AUDIO));
53+
$this->assertFalse($model->supports(Capability::OUTPUT_TEXT));
54+
$this->assertFalse($model->supports(Capability::SPEECH_TO_TEXT));
55+
}
56+
57+
public function testGetCapabilitiesReturnsSpeechToTextCapabilities()
58+
{
59+
$model = new ElevenLabs(ElevenLabs::SCRIBE_V1);
60+
61+
$capabilities = $model->getCapabilities();
62+
63+
$this->assertCount(3, $capabilities);
64+
$this->assertContains(Capability::INPUT_AUDIO, $capabilities);
65+
$this->assertContains(Capability::OUTPUT_TEXT, $capabilities);
66+
$this->assertContains(Capability::SPEECH_TO_TEXT, $capabilities);
67+
}
68+
69+
public function testGetCapabilitiesReturnsTextToSpeechCapabilities()
70+
{
71+
$model = new ElevenLabs(ElevenLabs::ELEVEN_V3);
72+
73+
$capabilities = $model->getCapabilities();
74+
75+
$this->assertCount(3, $capabilities);
76+
$this->assertContains(Capability::INPUT_TEXT, $capabilities);
77+
$this->assertContains(Capability::OUTPUT_AUDIO, $capabilities);
78+
$this->assertContains(Capability::TEXT_TO_SPEECH, $capabilities);
79+
}
80+
81+
public function testModelNameIsCorrectlySet()
82+
{
83+
$model = new ElevenLabs(ElevenLabs::SCRIBE_V1);
84+
85+
$this->assertSame(ElevenLabs::SCRIBE_V1, $model->getName());
86+
}
87+
88+
public function testModelOptionsAreCorrectlySet()
89+
{
90+
$options = ['voice' => 'test-voice', 'speed' => 1.2];
91+
$model = new ElevenLabs(ElevenLabs::ELEVEN_MULTILINGUAL_V2, $options);
92+
93+
$this->assertSame($options, $model->getOptions());
94+
}
95+
96+
#[DataProvider('speechToTextModelProvider')]
97+
public function testAllSpeechToTextModelsHaveCorrectCapabilities(string $modelName)
98+
{
99+
$model = new ElevenLabs($modelName);
100+
101+
$this->assertTrue($model->supports(Capability::SPEECH_TO_TEXT));
102+
$this->assertTrue($model->supports(Capability::INPUT_AUDIO));
103+
$this->assertTrue($model->supports(Capability::OUTPUT_TEXT));
104+
}
105+
106+
#[DataProvider('textToSpeechModelProvider')]
107+
public function testAllTextToSpeechModelsHaveCorrectCapabilities(string $modelName)
108+
{
109+
$model = new ElevenLabs($modelName);
110+
111+
$this->assertTrue($model->supports(Capability::TEXT_TO_SPEECH));
112+
$this->assertTrue($model->supports(Capability::INPUT_TEXT));
113+
$this->assertTrue($model->supports(Capability::OUTPUT_AUDIO));
114+
}
115+
116+
public static function speechToTextModelProvider(): iterable
117+
{
118+
yield [ElevenLabs::SCRIBE_V1];
119+
yield [ElevenLabs::SCRIBE_V1_EXPERIMENTAL];
120+
}
121+
122+
public static function textToSpeechModelProvider(): iterable
123+
{
124+
yield [ElevenLabs::ELEVEN_V3];
125+
yield [ElevenLabs::ELEVEN_TTV_V3];
126+
yield [ElevenLabs::ELEVEN_MULTILINGUAL_V2];
127+
yield [ElevenLabs::ELEVEN_FLASH_V250];
128+
yield [ElevenLabs::ELEVEN_FLASH_V2];
129+
yield [ElevenLabs::ELEVEN_TURBO_V2_5];
130+
yield [ElevenLabs::ELEVEN_TURBO_v2];
131+
yield [ElevenLabs::ELEVEN_MULTILINGUAL_STS_V2];
132+
yield [ElevenLabs::ELEVEN_MULTILINGUAL_ttv_V2];
133+
yield [ElevenLabs::ELEVEN_ENGLISH_STS_V2];
134+
}
135+
}

0 commit comments

Comments
 (0)