Skip to content

Commit

Permalink
perf: tts buffer
Browse files Browse the repository at this point in the history
  • Loading branch information
c121914yu committed Nov 16, 2023
1 parent db14c06 commit 7ca6e3f
Show file tree
Hide file tree
Showing 16 changed files with 125 additions and 84 deletions.
4 changes: 3 additions & 1 deletion packages/global/core/ai/model.d.ts
Original file line number Diff line number Diff line change
Expand Up @@ -29,5 +29,7 @@ export type AudioSpeechModelType = {
model: string;
name: string;
price: number;
voices: { label: string; value: string }[];
baseUrl?: string;
key?: string;
voices: { label: string; value: string; bufferId: string }[];
};
12 changes: 6 additions & 6 deletions packages/global/core/ai/model.ts
Original file line number Diff line number Diff line change
Expand Up @@ -107,12 +107,12 @@ export const defaultAudioSpeechModels: AudioSpeechModelType[] = [
name: 'OpenAI TTS1',
price: 0,
voices: [
{ label: 'Alloy', value: 'Alloy' },
{ label: 'Echo', value: 'Echo' },
{ label: 'Fable', value: 'Fable' },
{ label: 'Onyx', value: 'Onyx' },
{ label: 'Nova', value: 'Nova' },
{ label: 'Shimmer', value: 'Shimmer' }
{ label: 'Alloy', value: 'Alloy', bufferId: 'openai-Alloy' },
{ label: 'Echo', value: 'Echo', bufferId: 'openai-Echo' },
{ label: 'Fable', value: 'Fable', bufferId: 'openai-Fable' },
{ label: 'Onyx', value: 'Onyx', bufferId: 'openai-Onyx' },
{ label: 'Nova', value: 'Nova', bufferId: 'openai-Nova' },
{ label: 'Shimmer', value: 'Shimmer', bufferId: 'openai-Shimmer' }
]
}
];
17 changes: 0 additions & 17 deletions packages/global/core/ai/speech/constant.ts

This file was deleted.

1 change: 0 additions & 1 deletion packages/global/core/app/type.d.ts
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
import { ModuleItemType } from '../module/type';
import { AppTypeEnum } from './constants';
import { PermissionTypeEnum } from '../../support/permission/constant';
import { Text2SpeechVoiceEnum } from '../ai/speech/constant';

export interface AppSchema {
_id: string;
Expand Down
1 change: 0 additions & 1 deletion packages/global/core/chat/type.d.ts
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,6 @@ export type ChatItemSchema = {
userFeedback?: string;
adminFeedback?: AdminFbkType;
[TaskResponseKeyEnum.responseData]?: ChatHistoryItemResType[];
tts?: Buffer;
};

export type AdminFbkType = {
Expand Down
35 changes: 35 additions & 0 deletions packages/service/common/buffer/tts/schema.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
import { connectionMongo, type Model } from '../../../common/mongo';
const { Schema, model, models } = connectionMongo;
import { TTSBufferSchemaType } from './type.d';

export const collectionName = 'ttsbuffers';

const TTSBufferSchema = new Schema({
bufferId: {
type: String,
required: true
},
text: {
type: String,
required: true
},
buffer: {
type: Buffer,
required: true
},
createTime: {
type: Date,
default: () => new Date()
}
});

try {
TTSBufferSchema.index({ bufferId: 1 });
// 24 hour
TTSBufferSchema.index({ createTime: 1 }, { expireAfterSeconds: 24 * 60 * 60 });
} catch (error) {
console.log(error);
}

export const MongoTTSBuffer: Model<TTSBufferSchemaType> =
models[collectionName] || model(collectionName, TTSBufferSchema);
5 changes: 5 additions & 0 deletions packages/service/common/buffer/tts/type.d.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
export type TTSBufferSchemaType = {
bufferId: string;
text: string;
buffer: Buffer;
};
17 changes: 10 additions & 7 deletions packages/service/core/ai/audio/speech.ts
Original file line number Diff line number Diff line change
@@ -1,28 +1,31 @@
import type { NextApiResponse } from 'next';
import { getAIApi } from '../config';
import { defaultAudioSpeechModels } from '../../../../global/core/ai/model';
import { Text2SpeechVoiceEnum } from '@fastgpt/global/core/ai/speech/constant';
import { UserModelSchema } from '@fastgpt/global/support/user/type';

export async function text2Speech({
res,
onSuccess,
onError,
model = defaultAudioSpeechModels[0].model,
voice = Text2SpeechVoiceEnum.alloy,
input,
speed = 1
model = defaultAudioSpeechModels[0].model,
voice,
speed = 1,
props
}: {
res: NextApiResponse;
onSuccess: (e: { model: string; buffer: Buffer }) => void;
onError: (e: any) => void;
model?: string;
voice?: `${Text2SpeechVoiceEnum}`;
input: string;
model: string;
voice: string;
speed?: number;
props?: UserModelSchema['openaiAccount'];
}) {
const ai = getAIApi();
const ai = getAIApi(props);
const response = await ai.audio.speech.create({
model,
// @ts-ignore
voice,
input,
response_format: 'mp3',
Expand Down
3 changes: 0 additions & 3 deletions packages/service/core/chat/chatItemSchema.ts
Original file line number Diff line number Diff line change
Expand Up @@ -68,9 +68,6 @@ const ChatItemSchema = new Schema({
[TaskResponseKeyEnum.responseData]: {
type: Array,
default: []
},
tts: {
type: Buffer
}
});

Expand Down
14 changes: 8 additions & 6 deletions projects/app/data/config.json
Original file line number Diff line number Diff line change
Expand Up @@ -103,13 +103,15 @@
"model": "tts-1",
"name": "OpenAI TTS1",
"price": 0,
"baseUrl": "https://api.openai.com/v1",
"key": "",
"voices": [
{ "label": "Alloy", "value": "Alloy" },
{ "label": "Echo", "value": "Echo" },
{ "label": "Fable", "value": "Fable" },
{ "label": "Onyx", "value": "Onyx" },
{ "label": "Nova", "value": "Nova" },
{ "label": "Shimmer", "value": "Shimmer" }
{ "label": "Alloy", "value": "alloy", "bufferId": "openai-Alloy" },
{ "label": "Echo", "value": "echo", "bufferId": "openai-Echo" },
{ "label": "Fable", "value": "fable", "bufferId": "openai-Fable" },
{ "label": "Onyx", "value": "onyx", "bufferId": "openai-Onyx" },
{ "label": "Nova", "value": "nova", "bufferId": "openai-Nova" },
{ "label": "Shimmer", "value": "shimmer", "bufferId": "openai-Shimmer" }
]
}
]
Expand Down
1 change: 0 additions & 1 deletion projects/app/src/global/core/chat/api.d.ts
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
import type { AppTTSConfigType } from '@/types/app';

export type GetChatSpeechProps = {
chatItemId?: string;
ttsConfig: AppTTSConfigType;
input: string;
};
55 changes: 35 additions & 20 deletions projects/app/src/pages/api/core/chat/item/getSpeech.ts
Original file line number Diff line number Diff line change
@@ -1,12 +1,13 @@
import type { NextApiRequest, NextApiResponse } from 'next';
import { jsonRes } from '@fastgpt/service/common/response';
import { connectToDatabase } from '@/service/mongo';
import { MongoChatItem } from '@fastgpt/service/core/chat/chatItemSchema';
import { GetChatSpeechProps } from '@/global/core/chat/api.d';
import { text2Speech } from '@fastgpt/service/core/ai/audio/speech';
import { pushAudioSpeechBill } from '@/service/support/wallet/bill/push';
import { authCert } from '@fastgpt/service/support/permission/auth/common';
import { authType2BillSource } from '@/service/support/wallet/bill/utils';
import { getAudioSpeechModel } from '@/service/core/ai/model';
import { MongoTTSBuffer } from '@fastgpt/service/common/buffer/tts/schema';

/*
1. get tts from chatItem store
Expand All @@ -18,31 +19,43 @@ import { authType2BillSource } from '@/service/support/wallet/bill/utils';
export default async function handler(req: NextApiRequest, res: NextApiResponse) {
try {
await connectToDatabase();
const { chatItemId, ttsConfig, input } = req.body as GetChatSpeechProps;
const { ttsConfig, input } = req.body as GetChatSpeechProps;

if (!ttsConfig.model || !ttsConfig.voice) {
throw new Error('model or voice not found');
}

const { teamId, tmbId, authType } = await authCert({ req, authToken: true });

const chatItem = await (async () => {
if (!chatItemId) return null;
return await MongoChatItem.findOne(
{
dataId: chatItemId
},
'tts'
);
})();
const ttsModel = getAudioSpeechModel(ttsConfig.model);
const voiceData = ttsModel.voices.find((item) => item.value === ttsConfig.voice);

if (!voiceData) {
throw new Error('voice not found');
}

const ttsBuffer = await MongoTTSBuffer.findOne(
{
bufferId: voiceData.bufferId,
text: input
},
'buffer'
);

if (chatItem?.tts) {
return jsonRes(res, {
data: chatItem.tts
});
if (ttsBuffer?.buffer) {
return res.end(new Uint8Array(ttsBuffer.buffer.buffer));
}

await text2Speech({
res,
input,
model: ttsConfig.model,
voice: ttsConfig.voice,
input,
res,
props: {
// temp code
baseUrl: ttsModel.baseUrl || '',
key: ttsModel.key || ''
},
onSuccess: async ({ model, buffer }) => {
try {
pushAudioSpeechBill({
Expand All @@ -53,9 +66,11 @@ export default async function handler(req: NextApiRequest, res: NextApiResponse)
source: authType2BillSource({ authType })
});

if (!chatItem) return;
chatItem.tts = buffer;
await chatItem.save();
await MongoTTSBuffer.create({
bufferId: voiceData.bufferId,
text: input,
buffer
});
} catch (error) {}
},
onError: (err) => {
Expand Down
6 changes: 5 additions & 1 deletion projects/app/src/pages/api/system/getInitData.ts
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,11 @@ export default async function handler(req: NextApiRequest, res: NextApiResponse)
cqModels: global.cqModels,
extractModels: global.extractModels,
vectorModels: global.vectorModels,
audioSpeechModels: global.audioSpeechModels,
audioSpeechModels: global.audioSpeechModels.map((item) => ({
...item,
baseUrl: undefined,
key: undefined
})),
priceMd: global.priceMd,
systemVersion: global.systemVersion || '0.0.0'
}
Expand Down
19 changes: 10 additions & 9 deletions projects/app/src/pages/app/detail/components/TTSSelect.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -6,10 +6,10 @@ import React, { useCallback, useMemo } from 'react';
import { useTranslation } from 'next-i18next';
import MySelect from '@/components/Select';
import { TTSTypeEnum } from '@/constants/app';
import { Text2SpeechVoiceEnum, openaiTTSModel } from '@fastgpt/global/core/ai/speech/constant';
import { AppTTSConfigType } from '@/types/app';
import { useAudioPlay } from '@/web/common/utils/voice';
import { useLoading } from '@/web/common/hooks/useLoading';
import { audioSpeechModels } from '@/web/common/system/staticData';

const TTSSelect = ({
value,
Expand Down Expand Up @@ -37,10 +37,16 @@ const TTSSelect = ({
if (e === TTSTypeEnum.none || e === TTSTypeEnum.web) {
onChange({ type: e as `${TTSTypeEnum}` });
} else {
const audioModel = audioSpeechModels.find((item) =>
item.voices.find((voice) => voice.value === e)
);
if (!audioModel) {
return;
}
onChange({
type: TTSTypeEnum.model,
model: openaiTTSModel,
voice: e as `${Text2SpeechVoiceEnum}`,
model: audioModel.model,
voice: e,
speed: 1
});
}
Expand Down Expand Up @@ -77,12 +83,7 @@ const TTSSelect = ({
list={[
{ label: t('core.app.tts.Close'), value: TTSTypeEnum.none },
{ label: t('core.app.tts.Web'), value: TTSTypeEnum.web },
{ label: 'Alloy', value: Text2SpeechVoiceEnum.alloy },
{ label: 'Echo', value: Text2SpeechVoiceEnum.echo },
{ label: 'Fable', value: Text2SpeechVoiceEnum.fable },
{ label: 'Onyx', value: Text2SpeechVoiceEnum.onyx },
{ label: 'Nova', value: Text2SpeechVoiceEnum.nova },
{ label: 'Shimmer', value: Text2SpeechVoiceEnum.shimmer }
...audioSpeechModels.map((item) => item.voices).flat()
]}
onchange={onclickChange}
/>
Expand Down
3 changes: 1 addition & 2 deletions projects/app/src/types/app.d.ts
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,6 @@ import type { FlowModuleTemplateType, ModuleItemType } from '@fastgpt/global/cor
import type { ChatSchema } from '@fastgpt/global/core/chat/type';
import type { AppSchema } from '@fastgpt/global/core/app/type';
import { ChatModelType } from '@/constants/model';
import { Text2SpeechVoiceEnum } from '@fastgpt/global/core/ai/speech/constant';

export interface ShareAppItem {
_id: string;
Expand All @@ -40,7 +39,7 @@ export type VariableItemType = {
export type AppTTSConfigType = {
type: 'none' | 'web' | 'model';
model?: string;
voice?: `${Text2SpeechVoiceEnum}`;
voice?: string;
speed?: number;
};

Expand Down
16 changes: 7 additions & 9 deletions projects/app/src/web/common/utils/voice.ts
Original file line number Diff line number Diff line change
Expand Up @@ -60,23 +60,21 @@ export const useAudioPlay = (props?: { ttsConfig?: AppTTSConfigType }) => {
});
setAudioLoading(false);

if (response.headers.get('Content-Type') === 'application/json') {
const { data } = (await response.json()) as { data: Buffer };
console.log(data);

playAudioBuffer({ audio, buffer: data });
return resolve({ buffer: data });
}

if (!response.body || !response.ok) {
throw new Error('Speech error');
const data = await response.json();
toast({
status: 'error',
title: getErrText(data, t('core.chat.Audio Speech Error'))
});
return reject(data);
}

const audioBuffer = await readAudioStream({
audio,
stream: response.body,
contentType: 'audio/mpeg'
});

resolve({
buffer: audioBuffer
});
Expand Down

0 comments on commit 7ca6e3f

Please sign in to comment.