Skip to content

server : POC OAI-compat TTS using OuteTTS #11070

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 5 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion common/arg.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2215,8 +2215,9 @@ common_params_context common_params_parser_init(common_params & params, llama_ex
params.hf_file = "OuteTTS-0.2-500M-Q8_0.gguf";
params.vocoder.hf_repo = "ggml-org/WavTokenizer";
params.vocoder.hf_file = "WavTokenizer-Large-75-F16.gguf";
params.ctx_shift = false; // for better results
}
).set_examples({LLAMA_EXAMPLE_TTS}));
).set_examples({LLAMA_EXAMPLE_TTS, LLAMA_EXAMPLE_SERVER}));

return ctx_arg;
}
1 change: 1 addition & 0 deletions examples/server/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ set(TARGET_SRCS
server.cpp
utils.hpp
httplib.h
../tts/tts-impl.cpp
)
set(PUBLIC_ASSETS
index.html.gz
Expand Down
132 changes: 132 additions & 0 deletions examples/server/public_tts/index.html
Original file line number Diff line number Diff line change
@@ -0,0 +1,132 @@
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="UTF-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<title>llama.cpp TTS</title>
<style>
body {
font-family: monospace;
margin: 2em;
}
</style>
<!-- <script src=" https://cdn.jsdelivr.net/npm/lamejs@1.2.1/lame.min.js"></script> -->
</head>
<body>
<h1>llama.cpp TTS</h1>

Input text:<br/>
<textarea id="input" rows="4" cols="50">Hello world</textarea><br/>
<button id="btn_speak" onclick="speak()">Speak</button><br/>
<br/>
<p id="status">Status: ready</p><br/>
<p id="output"></p>
<a id="download"></a>

<script>
const input_el = document.getElementById('input');
const output_el = document.getElementById('output');
const status_el = document.getElementById('status');
const download_el = document.getElementById('download');
const btn_speak_el = document.getElementById('btn_speak');

let working = false;

async function speak() {
if (working) {
return;
}

working = true;
input_el.disabled = true;
btn_speak_el.disabled = true;
status_el.textContent = 'Status: generating...';
download_el.textContent = '';

const input = input_el.value.trim();

try {
const res = await fetch('/v1/audio/speech', {
method: 'POST',
headers: {
'Content-Type': 'application/json'
},
body: JSON.stringify({
input,
response_format: 'wav',
}),
});

if (res.status === 200) {
const blob = await res.blob();
const url = URL.createObjectURL(blob);
download_el.href = url;
download_el.innerText = 'Download';
download_el.download = getFileNameWAV(input);
const audio = new Audio(url);
audio.play();
status_el.textContent = 'Status: playing...';
audio.addEventListener('ended', () => {
status_el.textContent = 'Status: ready';
});
echoTimings(res.headers, input);

// const buffer = await blob.arrayBuffer();
// wavToMp3(new Int16Array(buffer));
} else {
const text = await res.text();
throw new Error(`Failed to generate speech: ${text}`);
}
} catch (e) {
console.error(e);
alert(e.message);
status_el.textContent = 'Status: ready';
}

working = false;
input_el.disabled = false;
btn_speak_el.disabled = false;
}

function echoTimings(headers, input_txt) {
try {
const timingsTTC = JSON.parse(headers.get('X-timings-ttc'));
const timingsVoc = JSON.parse(headers.get('X-timings-voc'));
const timingsSpec = JSON.parse(headers.get('X-timings-spec'));
output_el.innerHTML = `
<b>Input text:</b> ${escapeHtml(input_txt)}<br/>
<b>Timings:</b><br/>
<b>TTC:</b>
<ul>
${Object.entries(timingsTTC).map(([k, v]) =>
`<li>${k}: ${k.endsWith('_ms') ? (v.toFixed(2) + ' ms') : parseInt(v)}</li>`
).join('')}
</ul>
<b>Voc:</b> ${timingsVoc.t_voc_ms.toFixed(2)} ms<br/>
<b>Spec:</b> ${timingsSpec.t_spec_ms.toFixed(2)} ms
`;
} catch (e) {
console.error(e);
output_el.innerHTML = 'No timings data is available.';
}
}

function escapeHtml(unsafe) {
return unsafe
.replace(/&/g, "&amp;")
.replace(/</g, "&lt;")
.replace(/>/g, "&gt;")
.replace(/"/g, "&quot;")
.replace(/'/g, "&#039;");
}

function getFileNameWAV(input) {
return input.replace(/[^a-z0-9]/gi, '_').toLowerCase().substring(0, 32) + '.wav';
}

function wavToMp3(wavData) {
// TODO: implement this using lamejs
}
</script>
</body>
</html>
Loading
Loading