Skip to content

Commit 396db1f

Browse files
committed
google stt plugin
1 parent f282315 commit 396db1f

File tree

12 files changed

+1411
-12
lines changed

12 files changed

+1411
-12
lines changed

plugins/google/CHANGELOG.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
# @livekit/agents-plugin-google

plugins/google/README.md

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,12 @@
1+
# Google plugin for LiveKit Agents
2+
3+
The Agents Framework is designed for building realtime, programmable
4+
participants that run on servers. Use it to create conversational, multi-modal
5+
voice agents that can see, hear, and understand.
6+
7+
This package contains the Google plugin, which allows for speech recognition.
8+
Refer to the [documentation](https://docs.livekit.io/agents/overview/) for
9+
information on how to use it, or browse the [API
10+
reference](https://docs.livekit.io/agents-js/modules/plugins_agents_plugin_google.html).
11+
See the [repository](https://github.com/livekit/agents-js) for more information
12+
about the framework as a whole.

plugins/google/api-extractor.json

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,20 @@
1+
/**
2+
* Config file for API Extractor. For more info, please visit: https://api-extractor.com
3+
*/
4+
{
5+
"$schema": "https://developer.microsoft.com/json-schemas/api-extractor/v7/api-extractor.schema.json",
6+
7+
/**
8+
* Optionally specifies another JSON config file that this file extends from. This provides a way for
9+
* standard settings to be shared across multiple projects.
10+
*
11+
* If the path starts with "./" or "../", the path is resolved relative to the folder of the file that contains
12+
* the "extends" field. Otherwise, the first path segment is interpreted as an NPM package name, and will be
13+
* resolved using NodeJS require().
14+
*
15+
* SUPPORTED TOKENS: none
16+
* DEFAULT VALUE: ""
17+
*/
18+
"extends": "../../api-extractor-shared.json",
19+
"mainEntryPointFilePath": "./dist/index.d.ts"
20+
}

plugins/google/package.json

Lines changed: 52 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,52 @@
1+
{
2+
"name": "@livekit/agents-plugin-google",
3+
"version": "0.1.0",
4+
"description": "Google plugin for LiveKit Agents for Node.js",
5+
"main": "dist/index.js",
6+
"require": "dist/index.cjs",
7+
"types": "dist/index.d.ts",
8+
"exports": {
9+
"import": {
10+
"types": "./dist/index.d.ts",
11+
"default": "./dist/index.js"
12+
},
13+
"require": {
14+
"types": "./dist/index.d.cts",
15+
"default": "./dist/index.cjs"
16+
}
17+
},
18+
"author": "LiveKit",
19+
"type": "module",
20+
"repository": "git@github.com:livekit/agents-js.git",
21+
"license": "Apache-2.0",
22+
"files": [
23+
"dist",
24+
"src",
25+
"README.md"
26+
],
27+
"scripts": {
28+
"build": "tsup --onSuccess \"pnpm build:types\"",
29+
"build:types": "tsc --declaration --emitDeclarationOnly && node ../../scripts/copyDeclarationOutput.js",
30+
"clean": "rm -rf dist",
31+
"clean:build": "pnpm clean && pnpm build",
32+
"lint": "eslint -f unix \"src/**/*.{ts,js}\"",
33+
"api:check": "api-extractor run --typescript-compiler-folder ../../node_modules/typescript",
34+
"api:update": "api-extractor run --local --typescript-compiler-folder ../../node_modules/typescript --verbose"
35+
},
36+
"devDependencies": {
37+
"@livekit/agents": "workspace:^x",
38+
"@livekit/agents-plugin-silero": "workspace:^x",
39+
"@livekit/agents-plugins-test": "workspace:^x",
40+
"@livekit/rtc-node": "^0.13.11",
41+
"@microsoft/api-extractor": "^7.35.0",
42+
"tsup": "^8.3.5",
43+
"typescript": "^5.0.0"
44+
},
45+
"dependencies": {
46+
"@google-cloud/speech": "^7.1.0"
47+
},
48+
"peerDependencies": {
49+
"@livekit/agents": "workspace:^x",
50+
"@livekit/rtc-node": "^0.13.11"
51+
}
52+
}

plugins/google/src/index.ts

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
export * from './stt.js';

plugins/google/src/models.ts

Lines changed: 84 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,84 @@
1+
export enum AudioEncoding {
2+
AUDIO_ENCODING_UNSPECIFIED = 'AUDIO_ENCODING_UNSPECIFIED',
3+
LINEAR16 = 'LINEAR16',
4+
MULAW = 'MULAW',
5+
ALAW = 'ALAW',
6+
AMR = 'AMR',
7+
AMR_WB = 'AMR_WB',
8+
FLAC = 'FLAC',
9+
MP3 = 'MP3',
10+
OGG_OPUS = 'OGG_OPUS',
11+
WEBM_OPUS = 'WEBM_OPUS',
12+
MP4_AAC = 'MP4_AAC',
13+
M4A_AAC = 'M4A_AAC',
14+
MOV_AAC = 'MOV_AAC',
15+
}
16+
17+
export enum SpeechEventType {
18+
SPEECH_EVENT_TYPE_UNSPECIFIED = 'SPEECH_EVENT_TYPE_UNSPECIFIED',
19+
END_OF_SINGLE_UTTERANCE = 'END_OF_SINGLE_UTTERANCE',
20+
SPEECH_ACTIVITY_BEGIN = 'SPEECH_ACTIVITY_BEGIN',
21+
SPEECH_ACTIVITY_END = 'SPEECH_ACTIVITY_END',
22+
}
23+
24+
// Google Cloud Speech-to-Text API types
25+
export interface GoogleCredentials {
26+
type: string;
27+
project_id: string;
28+
private_key_id: string;
29+
private_key: string;
30+
client_email: string;
31+
client_id: string;
32+
auth_uri: string;
33+
token_uri: string;
34+
auth_provider_x509_cert_url: string;
35+
client_x509_cert_url: string;
36+
}
37+
38+
export type SpeechLanguages =
39+
| 'en-US'
40+
| 'en-GB'
41+
| 'en-AU'
42+
| 'en-CA'
43+
| 'pl-PL'
44+
| 'de-DE'
45+
| 'fr-FR'
46+
| 'es-ES'
47+
| 'it-IT'
48+
| 'pt-BR'
49+
| 'ru-RU'
50+
| 'ja-JP'
51+
| 'ko-KR'
52+
| 'zh-CN'
53+
| 'zh-TW'
54+
| 'ar-SA'
55+
| 'hi-IN'
56+
| 'th-TH'
57+
| 'vi-VN'
58+
| 'tr-TR';
59+
60+
export type SpeechModels =
61+
| 'latest_long'
62+
| 'latest_short'
63+
| 'latest_medium'
64+
| 'command_and_search'
65+
| 'phone_call'
66+
| 'video'
67+
| 'default'
68+
| 'medical_conversation'
69+
| 'medical_dictation'
70+
| 'medical_question_and_answer'
71+
| 'medical_report'
72+
| 'medical_symptom'
73+
| 'medical_test'
74+
| 'medical_treatment'
75+
| 'medical_emergency'
76+
| 'medical_consultation'
77+
| 'medical_instruction'
78+
| 'medical_procedure'
79+
| 'medical_medication'
80+
| 'medical_diagnosis'
81+
| 'medical_condition';
82+
83+
export type LanguageType = SpeechLanguages | string;
84+
export type LanguageCode = LanguageType | LanguageType[];

plugins/google/src/stt.test.ts

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,10 @@
1+
import { initializeLogger } from '@livekit/agents';
2+
import { VAD } from '@livekit/agents-plugin-silero';
3+
import { stt } from '@livekit/agents-plugins-test';
4+
import { describe } from 'vitest';
5+
import { STT } from './stt.js';
6+
7+
describe('Google', async () => {
8+
initializeLogger({ pretty: false });
9+
await stt(new STT(), await VAD.load(), { nonStreaming: false });
10+
});

0 commit comments

Comments
 (0)