feature #688 [Platform][ElevenLabs] Use capability-based speech-to-text detection (OskarStark)

OskarStark · OskarStark · commit 5d4baada15d1 · 2025-09-27T12:07:56.000+02:00
This PR was squashed before being merged into the main branch. Discussion ---------- [Platform][ElevenLabs] Use capability-based speech-to-text detection | Q | A | ------------- | --- | Bug fix? | no | New feature? | yes | Docs? | no | Issues | -- | License | MIT Replace hardcoded model name checking with API capability detection for speech-to-text models. This makes the code more maintainable and allows for dynamic capability discovery. <img width="2190" height="690" alt="CleanShot 2025-09-27 at 11 56 06@2x" src="https://github.com/user-attachments/assets/cbdb944d-c4e6-41ad-8a72-dafbe1723389" /> Commits ------- ad663e5 [Platform][ElevenLabs] Use capability-based speech-to-text detection
diff --git a/src/platform/src/Bridge/ElevenLabs/ElevenLabs.php b/src/platform/src/Bridge/ElevenLabs/ElevenLabs.php
@@ -11,13 +11,15 @@
 
 namespace Symfony\AI\Platform\Bridge\ElevenLabs;
 
+use Symfony\AI\Platform\Capability;
 use Symfony\AI\Platform\Model;
 
 /**
  * @author Guillaume Loulier <personal@guillaumeloulier.fr>
  */
 final class ElevenLabs extends Model
 {
+    // text-to-speech models
     public const ELEVEN_V3 = 'eleven_v3';
     public const ELEVEN_TTV_V3 = 'eleven_ttv_v3';
     public const ELEVEN_MULTILINGUAL_V2 = 'eleven_multilingual_v2';
@@ -28,11 +30,27 @@ final class ElevenLabs extends Model
     public const ELEVEN_MULTILINGUAL_STS_V2 = 'eleven_multilingual_sts_v2';
     public const ELEVEN_MULTILINGUAL_ttv_V2 = 'eleven_multilingual_ttv_v2';
     public const ELEVEN_ENGLISH_STS_V2 = 'eleven_english_sts_v2';
+
+    // speech-to-text models
     public const SCRIBE_V1 = 'scribe_v1';
     public const SCRIBE_V1_EXPERIMENTAL = 'scribe_v1_experimental';
 
     public function __construct(string $name, array $options = [])
     {
-        parent::__construct($name, [], $options);
+        $capabilities = [
+            Capability::INPUT_TEXT,
+            Capability::OUTPUT_AUDIO,
+            Capability::TEXT_TO_SPEECH,
+        ];
+
+        if (\in_array($name, [self::SCRIBE_V1, self::SCRIBE_V1_EXPERIMENTAL], true)) {
+            $capabilities = [
+                Capability::INPUT_AUDIO,
+                Capability::OUTPUT_TEXT,
+                Capability::SPEECH_TO_TEXT,
+            ];
+        }
+
+        parent::__construct($name, $capabilities, $options);
     }
 }
diff --git a/src/platform/src/Bridge/ElevenLabs/ElevenLabsClient.php b/src/platform/src/Bridge/ElevenLabs/ElevenLabsClient.php
@@ -11,6 +11,7 @@
 
 namespace Symfony\AI\Platform\Bridge\ElevenLabs;
 
+use Symfony\AI\Platform\Capability;
 use Symfony\AI\Platform\Exception\InvalidArgumentException;
 use Symfony\AI\Platform\Model;
 use Symfony\AI\Platform\ModelClientInterface;
@@ -41,7 +42,7 @@ public function request(Model $model, array|string $payload, array $options = []
             throw new InvalidArgumentException(\sprintf('The payload must be an array, received "%s".', get_debug_type($payload)));
         }
 
-        if (\in_array($model->getName(), [ElevenLabs::SCRIBE_V1, ElevenLabs::SCRIBE_V1_EXPERIMENTAL], true)) {
+        if ($model->supports(Capability::SPEECH_TO_TEXT)) {
             return $this->doSpeechToTextRequest($model, $payload);
         }
 
diff --git a/src/platform/src/Capability.php b/src/platform/src/Capability.php
@@ -11,11 +11,15 @@
 
 namespace Symfony\AI\Platform;
 
+use OskarStark\Enum\Trait\Comparable;
+
 /**
  * @author Christopher Hertel <mail@christopher-hertel.de>
  */
 enum Capability: string
 {
+    use Comparable;
+
     // INPUT
     case INPUT_AUDIO = 'input-audio';
     case INPUT_IMAGE = 'input-image';
diff --git a/src/platform/src/Model.php b/src/platform/src/Model.php
@@ -51,7 +51,7 @@ public function getCapabilities(): array
 
     public function supports(Capability $capability): bool
     {
-        return \in_array($capability, $this->capabilities, true);
+        return $capability->equalsOneOf($this->capabilities);
     }
 
     /**
diff --git a/src/platform/tests/Bridge/ElevenLabs/ElevenLabsClientTest.php b/src/platform/tests/Bridge/ElevenLabs/ElevenLabsClientTest.php
@@ -30,7 +30,6 @@ public function testSupportsModel()
         $client = new ElevenLabsClient(
             new MockHttpClient(),
             'my-api-key',
-            'https://api.elevenlabs.io/v1',
         );
 
         $this->assertTrue($client->supports(new ElevenLabs(ElevenLabs::ELEVEN_MULTILINGUAL_V2)));
@@ -53,7 +52,6 @@ public function testClientCannotPerformWithInvalidModel()
         $client = new ElevenLabsClient(
             $mockHttpClient,
             'my-api-key',
-            'https://api.elevenlabs.io/v1',
         );
 
         $payload = $normalizer->normalize(Audio::fromFile(\dirname(__DIR__, 5).'/fixtures/audio.mp3'));
@@ -69,7 +67,6 @@ public function testClientCannotPerformSpeechToTextRequestWithInvalidPayload()
         $client = new ElevenLabsClient(
             new MockHttpClient(),
             'my-api-key',
-            'https://api.elevenlabs.io/v1',
         );
 
         $this->expectException(InvalidArgumentException::class);
@@ -90,7 +87,6 @@ public function testClientCanPerformSpeechToTextRequest()
         $client = new ElevenLabsClient(
             $httpClient,
             'my-api-key',
-            'https://api.elevenlabs.io/v1',
         );
 
         $payload = $normalizer->normalize(Audio::fromFile(\dirname(__DIR__, 5).'/fixtures/audio.mp3'));
@@ -100,6 +96,27 @@ public function testClientCanPerformSpeechToTextRequest()
         $this->assertSame(1, $httpClient->getRequestsCount());
     }
 
+    public function testClientCanPerformSpeechToTextRequestWithExperimentalModel()
+    {
+        $httpClient = new MockHttpClient([
+            new JsonMockResponse([
+                'text' => 'foo',
+            ]),
+        ]);
+        $normalizer = new AudioNormalizer();
+
+        $client = new ElevenLabsClient(
+            $httpClient,
+            'my-api-key',
+        );
+
+        $payload = $normalizer->normalize(Audio::fromFile(\dirname(__DIR__, 5).'/fixtures/audio.mp3'));
+
+        $client->request(new ElevenLabs(ElevenLabs::SCRIBE_V1_EXPERIMENTAL), $payload);
+
+        $this->assertSame(1, $httpClient->getRequestsCount());
+    }
+
     public function testClientCannotPerformTextToSpeechRequestWithoutValidPayload()
     {
         $mockHttpClient = new MockHttpClient([
@@ -115,7 +132,6 @@ public function testClientCannotPerformTextToSpeechRequestWithoutValidPayload()
         $client = new ElevenLabsClient(
             $mockHttpClient,
             'my-api-key',
-            'https://api.elevenlabs.io/v1',
         );
 
         $this->expectException(InvalidArgumentException::class);
@@ -143,7 +159,6 @@ public function testClientCanPerformTextToSpeechRequest()
         $client = new ElevenLabsClient(
             $httpClient,
             'my-api-key',
-            'https://api.elevenlabs.io/v1',
         );
 
         $client->request(new ElevenLabs(ElevenLabs::ELEVEN_MULTILINGUAL_V2, [
@@ -172,7 +187,6 @@ public function testClientCanPerformTextToSpeechRequestWhenVoiceKeyIsProvidedAsR
         $client = new ElevenLabsClient(
             $httpClient,
             'my-api-key',
-            'https://api.elevenlabs.io/v1',
         );
 
         $client->request(new ElevenLabs(ElevenLabs::ELEVEN_MULTILINGUAL_V2), [
@@ -201,7 +215,6 @@ public function testClientCanPerformTextToSpeechRequestAsStream()
         $client = new ElevenLabsClient(
             $httpClient,
             'my-api-key',
-            'https://api.elevenlabs.io/v1',
         );
 
         $result = $client->request(new ElevenLabs(ElevenLabs::ELEVEN_MULTILINGUAL_V2, [
@@ -232,7 +245,6 @@ public function testClientCanPerformTextToSpeechRequestAsStreamVoiceKeyIsProvide
         $client = new ElevenLabsClient(
             $httpClient,
             'my-api-key',
-            'https://api.elevenlabs.io/v1',
         );
 
         $result = $client->request(new ElevenLabs(ElevenLabs::ELEVEN_MULTILINGUAL_V2), [
diff --git a/src/platform/tests/Bridge/ElevenLabs/ElevenLabsTest.php b/src/platform/tests/Bridge/ElevenLabs/ElevenLabsTest.php
@@ -0,0 +1,135 @@
+<?php
+
+/*
+ * This file is part of the Symfony package.
+ *
+ * (c) Fabien Potencier <fabien@symfony.com>
+ *
+ * For the full copyright and license information, please view the LICENSE
+ * file that was distributed with this source code.
+ */
+
+namespace Symfony\AI\Platform\Tests\Bridge\ElevenLabs;
+
+use PHPUnit\Framework\Attributes\DataProvider;
+use PHPUnit\Framework\TestCase;
+use Symfony\AI\Platform\Bridge\ElevenLabs\ElevenLabs;
+use Symfony\AI\Platform\Capability;
+
+final class ElevenLabsTest extends TestCase
+{
+    public function testSpeechToTextModelHasCorrectCapabilities()
+    {
+        $model = new ElevenLabs(ElevenLabs::SCRIBE_V1);
+
+        $this->assertTrue($model->supports(Capability::INPUT_AUDIO));
+        $this->assertTrue($model->supports(Capability::OUTPUT_TEXT));
+        $this->assertTrue($model->supports(Capability::SPEECH_TO_TEXT));
+        $this->assertFalse($model->supports(Capability::INPUT_TEXT));
+        $this->assertFalse($model->supports(Capability::OUTPUT_AUDIO));
+        $this->assertFalse($model->supports(Capability::TEXT_TO_SPEECH));
+    }
+
+    public function testSpeechToTextExperimentalModelHasCorrectCapabilities()
+    {
+        $model = new ElevenLabs(ElevenLabs::SCRIBE_V1_EXPERIMENTAL);
+
+        $this->assertTrue($model->supports(Capability::INPUT_AUDIO));
+        $this->assertTrue($model->supports(Capability::OUTPUT_TEXT));
+        $this->assertTrue($model->supports(Capability::SPEECH_TO_TEXT));
+        $this->assertFalse($model->supports(Capability::INPUT_TEXT));
+        $this->assertFalse($model->supports(Capability::OUTPUT_AUDIO));
+        $this->assertFalse($model->supports(Capability::TEXT_TO_SPEECH));
+    }
+
+    public function testTextToSpeechModelHasCorrectCapabilities()
+    {
+        $model = new ElevenLabs(ElevenLabs::ELEVEN_MULTILINGUAL_V2);
+
+        $this->assertTrue($model->supports(Capability::INPUT_TEXT));
+        $this->assertTrue($model->supports(Capability::OUTPUT_AUDIO));
+        $this->assertTrue($model->supports(Capability::TEXT_TO_SPEECH));
+        $this->assertFalse($model->supports(Capability::INPUT_AUDIO));
+        $this->assertFalse($model->supports(Capability::OUTPUT_TEXT));
+        $this->assertFalse($model->supports(Capability::SPEECH_TO_TEXT));
+    }
+
+    public function testGetCapabilitiesReturnsSpeechToTextCapabilities()
+    {
+        $model = new ElevenLabs(ElevenLabs::SCRIBE_V1);
+
+        $capabilities = $model->getCapabilities();
+
+        $this->assertCount(3, $capabilities);
+        $this->assertContains(Capability::INPUT_AUDIO, $capabilities);
+        $this->assertContains(Capability::OUTPUT_TEXT, $capabilities);
+        $this->assertContains(Capability::SPEECH_TO_TEXT, $capabilities);
+    }
+
+    public function testGetCapabilitiesReturnsTextToSpeechCapabilities()
+    {
+        $model = new ElevenLabs(ElevenLabs::ELEVEN_V3);
+
+        $capabilities = $model->getCapabilities();
+
+        $this->assertCount(3, $capabilities);
+        $this->assertContains(Capability::INPUT_TEXT, $capabilities);
+        $this->assertContains(Capability::OUTPUT_AUDIO, $capabilities);
+        $this->assertContains(Capability::TEXT_TO_SPEECH, $capabilities);
+    }
+
+    public function testModelNameIsCorrectlySet()
+    {
+        $model = new ElevenLabs(ElevenLabs::SCRIBE_V1);
+
+        $this->assertSame(ElevenLabs::SCRIBE_V1, $model->getName());
+    }
+
+    public function testModelOptionsAreCorrectlySet()
+    {
+        $options = ['voice' => 'test-voice', 'speed' => 1.2];
+        $model = new ElevenLabs(ElevenLabs::ELEVEN_MULTILINGUAL_V2, $options);
+
+        $this->assertSame($options, $model->getOptions());
+    }
+
+    #[DataProvider('speechToTextModelProvider')]
+    public function testAllSpeechToTextModelsHaveCorrectCapabilities(string $modelName)
+    {
+        $model = new ElevenLabs($modelName);
+
+        $this->assertTrue($model->supports(Capability::SPEECH_TO_TEXT));
+        $this->assertTrue($model->supports(Capability::INPUT_AUDIO));
+        $this->assertTrue($model->supports(Capability::OUTPUT_TEXT));
+    }
+
+    #[DataProvider('textToSpeechModelProvider')]
+    public function testAllTextToSpeechModelsHaveCorrectCapabilities(string $modelName)
+    {
+        $model = new ElevenLabs($modelName);
+
+        $this->assertTrue($model->supports(Capability::TEXT_TO_SPEECH));
+        $this->assertTrue($model->supports(Capability::INPUT_TEXT));
+        $this->assertTrue($model->supports(Capability::OUTPUT_AUDIO));
+    }
+
+    public static function speechToTextModelProvider(): iterable
+    {
+        yield [ElevenLabs::SCRIBE_V1];
+        yield [ElevenLabs::SCRIBE_V1_EXPERIMENTAL];
+    }
+
+    public static function textToSpeechModelProvider(): iterable
+    {
+        yield [ElevenLabs::ELEVEN_V3];
+        yield [ElevenLabs::ELEVEN_TTV_V3];
+        yield [ElevenLabs::ELEVEN_MULTILINGUAL_V2];
+        yield [ElevenLabs::ELEVEN_FLASH_V250];
+        yield [ElevenLabs::ELEVEN_FLASH_V2];
+        yield [ElevenLabs::ELEVEN_TURBO_V2_5];
+        yield [ElevenLabs::ELEVEN_TURBO_v2];
+        yield [ElevenLabs::ELEVEN_MULTILINGUAL_STS_V2];
+        yield [ElevenLabs::ELEVEN_MULTILINGUAL_ttv_V2];
+        yield [ElevenLabs::ELEVEN_ENGLISH_STS_V2];
+    }
+}

Original file line number	Diff line number	Diff line change
`@@ -11,6 +11,7 @@`
`11`	`11`
`12`	`12`	`namespace Symfony\AI\Platform\Bridge\ElevenLabs;`
`13`	`13`
	`14`	`+use Symfony\AI\Platform\Capability;`
`14`	`15`	`use Symfony\AI\Platform\Exception\InvalidArgumentException;`
`15`	`16`	`use Symfony\AI\Platform\Model;`
`16`	`17`	`use Symfony\AI\Platform\ModelClientInterface;`
`@@ -41,7 +42,7 @@ public function request(Model $model, array\|string $payload, array $options = []`
`41`	`42`	`throw new InvalidArgumentException(\sprintf('The payload must be an array, received "%s".', get_debug_type($payload)));`
`42`	`43`	`}`
`43`	`44`
`44`		`- if (\in_array($model->getName(), [ElevenLabs::SCRIBE_V1, ElevenLabs::SCRIBE_V1_EXPERIMENTAL], true)) {`
	`45`	`+ if ($model->supports(Capability::SPEECH_TO_TEXT)) {`
`45`	`46`	`return $this->doSpeechToTextRequest($model, $payload);`
`46`	`47`	`}`
`47`	`48`
Original file line number	Diff line number	Diff line change
`@@ -51,7 +51,7 @@ public function getCapabilities(): array`
`51`	`51`
`52`	`52`	`public function supports(Capability $capability): bool`
`53`	`53`	`{`
`54`		`- return \in_array($capability, $this->capabilities, true);`
	`54`	`+ return $capability->equalsOneOf($this->capabilities);`
`55`	`55`	`}`
`56`	`56`
`57`	`57`	`/**`