haydenbleasel · iamNarcisse · Jan 29, 2025 · Jan 30, 2025 · Jan 30, 2025 · Jan 30, 2025
diff --git a/__tests__/jigsawstack.test.ts b/__tests__/jigsawstack.test.ts
@@ -0,0 +1,30 @@
+import { writeFile } from 'node:fs/promises';
+import { describe, expect, it } from 'vitest';
+import { speak, transcribeWithUrl } from '../src';
+import { jigsawstack } from '../src/jigsawstack';
+
+describe('JigsawStack Tests', () => {
+  it('should convert text to speech', async () => {
+    const speech = await speak({
+      model: jigsawstack.tts("en-US-female-10"),
+      prompt: 'Hello from Orate, the AI toolkit for speech.',
+    });
+
+    await writeFile(
+      './__tests__/output/jigsawstack-speech.wav',
+      Buffer.from(await speech.arrayBuffer())
+    );
+
+    expect(speech).toBeInstanceOf(File);
+    expect(speech.size).toBeGreaterThan(0);
+  });
+
+  it('should convert speech to text', async () => {
+    const text = await transcribeWithUrl({
+      model: jigsawstack.stt(),
+      url:"https://uuvhpoxkzjnrvvajhnyb.supabase.co/storage/v1/object/sign/default/preview/stt-examples/stt_very_short_audio_sample_2.mp3?token=eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJ1cmwiOiJkZWZhdWx0L3ByZXZpZXcvc3R0LWV4YW1wbGVzL3N0dF92ZXJ5X3Nob3J0X2F1ZGlvX3NhbXBsZV8yLm1wMyIsImlhdCI6MTczMjIwMzIwNywiZXhwIjozMTU1MzAwNjY3MjA3fQ._R0cLbrIx_FUR3CMRYaUMj616diA_1fjWUcVq2vAONg&t=2024-11-21T15%3A33%3A27.154Z",
+    });
+    expect(typeof text).toBe('string');
+    expect(text.length).toBeGreaterThan(0);
+  });
+});
diff --git a/package.json b/package.json
@@ -56,6 +56,7 @@
     "fluent-ffmpeg": "^2.1.3",
     "groq-sdk": "^0.12.0",
     "ibm-watson": "^10.0.0",
+    "jigsawstack": "^0.0.25",
     "ky": "^1.7.4",
     "microsoft-cognitiveservices-speech-sdk": "^1.42.0",
     "openai": "^4.80.0",

diff --git a/pnpm-lock.yaml b/pnpm-lock.yaml
diff --git a/src/index.ts b/src/index.ts
@@ -85,3 +85,29 @@ export const isolate = async ({
   model,
   audio,
 }: IsolateOptions): Promise<File> => model(audio);
+
+
+
+/**
+ * Options for the transcribe function to convert speech to text.
+ * @interface TranscribeWithUrlOptions
+ * @property {function} model - A function that takes an audio url  and returns a Promise resolving to the transcribed text
+ * @property {string} url - The audio url to transcribe
+ */
+export type TranscribeWithUrlOptions = {
+  model: (url: string) => Promise<string>;
+  url: string;
+};
+
+
+/**
+ * Transcribes audio url to text using the provided model.
+ * @param {TranscribeWithUrlOptions} options - The options for speech-to-text transcription using audio url
+ * @param {function} options.model - The model function to use for transcription
+ * @param {string} options.url - The audio url to transcribe
+ * @returns {Promise<string>} A Promise that resolves to the transcribed text
+ */
+export const transcribeWithUrl = async ({
+  model,
+  url,
+}: TranscribeWithUrlOptions): Promise<string> => model(url);
diff --git a/src/jigsawstack.ts b/src/jigsawstack.ts
@@ -0,0 +1,74 @@
+import { JigsawStack } from 'jigsawstack';
+
+type JigsawStackType = ReturnType<typeof JigsawStack>;
+type STTParams = Parameters<JigsawStackType['audio']['speech_to_text']>['0'];
+type TTSParams = Parameters<JigsawStackType['audio']['text_to_speech']>['0'];
+
+/**
+ * Creates an JigsawStack provider instance with API key from environment variables
+ * @returns {JigsawStackType} Configured JigsawStack client instance
+ * @throws {Error} If JIGSAWSTACK_API_KEY environment variable is not set
+ */
+const createProvider = () => {
+  const apiKey = process.env.JIGSAWSTACK_API_KEY;
+
+  if (!apiKey) {
+    throw new Error('JIGSAWSTACK_API_KEY is not set');
+  }
+
+  return JigsawStack({ apiKey });
+};
+
+/**
+ * JigsawStack speech-to-text functionality
+ */
+export const jigsawstack = {
+  /**
+   * Creates a speech-to-text transcription function using JigsawStack
+   * @param {Omit<STTParams, 'url'>} options - Additional options for the transcription
+   * @returns {Function} Async function that takes audio url and returns transcribed text
+   */
+  stt: (options?: Omit<STTParams, 'url'>) => {
+    const provider = createProvider();
+    /**
+     * Transcribes audio to text using JigsawStack
+     * @param {string} url - The audio url to transcribe
+     * @returns {Promise<string>} The transcribed text
+     * @throws {Error} If no transcription results are found
+     */
+    return async (url: string) => {
+      const response = await provider.audio.speech_to_text({
+        url,
+        ...options,
+      });
+      return response.text;
+    };
+  },
+
+  /**
+   * Creates a text-to-speech synthesis function using JigsawStack TTS
+   * @param {TTSParams["accent"]} accent - The voice to use for synthesis. Defaults to 'en-US-female-27'
+   * @returns {Function} Async function that takes text and returns synthesized audio
+   */
+  tts: (
+    accent: TTSParams['accent'] = 'en-US-female-27',
+    properties?: Omit<TTSParams, 'text' | 'accent'>
+  ) => {
+    const provider = createProvider();
+
+    /**
+     * Synthesizes text to speech using JigsawStack TTS
+     * @param {string} text - The text to convert to speech
+     * @returns {Promise<File>} The synthesized audio data
+     */
+    return async (text: string) => {
+      const response = await provider.audio.text_to_speech({
+        text,
+        accent,
+        ...properties,
+      });
+      const file = await response.file('speech.mp3', { type: 'audio/mpeg' });
+      return file;
+    };
+  },
+};
diff --git a/src/openai.ts b/src/openai.ts
@@ -77,6 +77,7 @@ export const openai = {
       const response = await provider.audio.transcriptions.create({
         model,
         file: audio,
+
         ...properties,
       });
 

diff --git a/website/app/(home)/components/providers.tsx b/website/app/(home)/components/providers.tsx
@@ -12,6 +12,7 @@ import Gladia from '../../../public/providers/gladia.svg';
 import Google from '../../../public/providers/google.svg';
 import Groq from '../../../public/providers/groq.svg';
 import IBM from '../../../public/providers/ibm.svg';
+import JigsawStack from '../../../public/providers/jigsaw-stack.svg';
 import Murf from '../../../public/providers/murf.svg';
 import OpenAI from '../../../public/providers/openai.svg';
 import Play from '../../../public/providers/play.svg';
@@ -34,6 +35,7 @@ const providers = [
   { name: 'Replicate', image: Replicate, href: '/docs/replicate' },
   { name: 'Groq', image: Groq, href: '/docs/groq' },
   { name: 'Play', image: Play, href: '/docs/play' },
+  { name: 'JigsawStack', image: JigsawStack, href: '/docs/jigsawstack' },
 ];
 
 export const Providers = () => (

diff --git a/website/content/docs/jigsaw-stack.mdx b/website/content/docs/jigsaw-stack.mdx
@@ -0,0 +1,87 @@
+---
+title: JigsawStack
+description: Orate supports JigsawStack's speech and transcription services.
+---
+
+import { AutoTypeTable } from 'fumadocs-typescript/ui';
+
+JigsawStack offer a suite of small fast models that automate the boring and complex tasks in every tech stack with specialized finetuning.
+
+## Setup
+
+The JigsawStack provider is available by default in Orate. To import it, you can use the following code:
+
+```ts
+import { jigsawstack } from 'orate/jigsawstack';
+```
+
+## Configuration
+
+The JigsawStack provider looks for the `JIGSAWSTACK_API_KEY` environment variable. This variable is required for the provider to work. Simply add the following to your `.env` file:
+
+```bash
+JIGSAWSTACK_API_KEY="your_api_key"
+```
+
+## Usage
+
+The JigsawStack provider provides a single interface for all of JigsawStack's speech and transcription services.
+
+### Text to Speech
+
+The JigsawStack provider provides a `tts` function that allows you to create a text-to-speech synthesis function using JigsawStack TTS. By default, the `tts` function uses the `en-US-female-27` voice (accent).
+
+```ts
+import { speak } from 'orate';
+import { jigsawstack } from 'orate/jigsawstack';
+
+const speech = await speak({
+  model: jigsawstack.tts(),
+  prompt: 'Hello, world!',
+});
+```
+
+You can specify the model and voice to use by passing them as arguments to the `tts` function.
+
+```ts
+const speech = await speak({
+  model: jigsawstack.tts('en-US-female-27'),
+  prompt: 'Hello, world!',
+});
+```
+
+You can also specify specific JigsawStack properties by passing them as an argument to the `tts` function.
+
+```ts
+const speech = await speak({
+  model: jigsawstack.tts('en-US-female-27', {
+    speaker_clone_file_store_key: '...',
+  }),
+  prompt: 'Hello, world!',
+});
+```
+
+### Speech to Text
+
+The JigsawStack provider provides a `stt` function that allows you to create a speech-to-text transcription function using JigsawStack.
+
+```ts
+import { transcribe } from 'orate';
+import { jigsawstack } from 'orate/jigsawstack';
+
+const text = await transcribe({
+  model: jigsawstack.stt(),
+  audio: someArrayBuffer,
+});
+```
+
+You can also specify specific JigsawStack properties by passing them as an argument to the `stt` function.
+
+```ts
+const text = await transcribe({
+  model: jigsawstack.stt({
+    batch_size: 1,
+  }),
+  audio: someArrayBuffer,
+});
+```
diff --git a/website/public/providers/jigsaw-stack.svg b/website/public/providers/jigsaw-stack.svg
-Original file line number
+Diff line change
@@ Expand Up / @@ -77,6 +77,7 @@ export const openai = { @@
           const response = await provider.audio.transcriptions.create({
             model,
             file: audio,
             ...properties,
           });
@@ Expand Down @@