Skip to content
This repository was archived by the owner on Jul 1, 2021. It is now read-only.

修正 html中的ajax请求地址,增加自定义音频输出接口 #8

Open
wants to merge 4 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 3 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
@@ -1,2 +1,4 @@
.DS_Store
.wav
.wav
node_modules
venv
23 changes: 22 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,8 @@ Chinese Text-to-Speech(TTS)

汉字 => ["han4", "zi4"] => .wav audio

(environment: python 3)

*Read this page in [简体中文](https://github.com/junzew/HanTTS/blob/master/README.zh.md)*
## Libraries Used

Expand All @@ -28,7 +30,6 @@ cd HanTTS
pip install --user -r requires.txt
```

Download [`syllables.zip`](https://sourceforge.net/projects/hantts/files/?source=navbar) from SourceForge, and decompress under the directory `HanTTS`.

* Either run locally: `python main.py`
* Or through web interface:
Expand All @@ -39,6 +40,26 @@ Download [`syllables.zip`](https://sourceforge.net/projects/hantts/files/?source
```
Navigate to `localhost:3000` in a browser

## Advance usage
set audio params by yourself
`http://127.0.0.1:3000/pythonAlias/audioType/decodeUTF8/compressed/speed/text`

For example
```
http://127.0.0.1:3000/python3/wav/false/true/1/测试
```

| params | accept |note|
| ---- | ---- |----|
| pythonAlias | python, python3 |if your device use 'python3' as command, you can use this|
| audioType | wav, mp3 (others not tested) | choose the output file type|
|decodeUTF8|true, false|if text need to decode to utf-8|
|compressed|true, false| output a smaller audio file|
|speed|numbers, like -0.5, 1, 3 |( float and negative is accepted ), if you don't want to change speed, use 0|
|text|chinese|the content of TTS|



## Use your own voice
- Record [five tones](https://en.wikipedia.org/wiki/Pinyin#Tones) of each [pinyin](https://en.wikipedia.org/wiki/Pinyin_table) listed in `mapping.json`
- Group them by the first letter (a,b,c,d, etc.), and save under folder `./recording` as `{letter}.wav`
Expand Down
21 changes: 19 additions & 2 deletions README.zh.md
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@

汉字 => 拼音 ["han4", "zi4"] => .wav音频

(environment: python 3)
## 使用的库

#### 汉字转拼音
Expand All @@ -27,15 +28,31 @@ cd HanTTS
pip install --user -r requires.txt
```

从SourceForge下载语音库[`syllables.zip`](https://sourceforge.net/projects/hantts/files/?source=navbar),并解压到`HanTTS`目录下

- 本地执行 `python main.py`
- 或 Web
- `cd` 到 `server` 文件夹下
- `npm install`
- `node app.js`
- 浏览器里打开`localhost:3000`

## 进阶使用
自己设置音频参数
`http://127.0.0.1:3000/pythonAlias/audioType/decodeUTF8/compressed/speed/text`

For example
```
http://127.0.0.1:3000/python3/wav/false/true/1/测试
```

| params | accept |note|
| ---- | ---- |----|
| pythonAlias | python, python3 |如果你的设备python别名为python3,请填写python3|
| audioType | wav, mp3 (其他没有测试) | 音频输出格式|
|decodeUTF8|true, false|是否文字需要解码utf-8|
|compressed|true, false|是否输出一个压缩文件|
|speed|数字, 比如 -0.5, 1, 3 |( 可以使用float或者负数 ), 如果不想改变速度,填写 0|
|text|中文|TTS的内容|

## 录制新的语音库
- 按阴平、阳平、上声、去声、轻声的顺序录下 mapping.json 里每一个音节的五个声调
- 按开头字母(letter)分组, 将文件存在 ./recording/{letter}.wav下
Expand Down
111 changes: 86 additions & 25 deletions app.js
Original file line number Diff line number Diff line change
Expand Up @@ -2,40 +2,101 @@ const express = require('express')
var bodyParser = require('body-parser');
const spawn = require('child_process').spawn;
var fs = require('fs');
const path = require('path');
const utf8 = require('utf8');

const app = express()
app.use(express.static('public'))
app.use('/audio', express.static('audio'))
app.use(bodyParser.json()); // for parsing application/json

app.listen(process.env.PORT || 3000, function () {
console.log('app listening on port 3000')
var dir = './audio';
if (!fs.existsSync(dir)){
fs.mkdirSync(dir);
console.log("created directory ./audio")
}
console.log('app listening on port 3000')
var dir = './audio';
if (!fs.existsSync(dir)) {
fs.mkdirSync(dir);
console.log("created directory ./audio")
}
});

app.post("/", function(req, res) {
var text = req.body.text;
console.log(text)
args = ["./main.py", 'synthesize', '--text', text, '--src', "./syllables/", '--dst', "./audio/"]
var process = spawn('python', args);
var output = "";
process.stdout.on('data', function(data){ output += data });
process.stderr.on('data', function(data){ console.error(`stderr: ${data}`); });
process.on("close", function(code) {
if (code !== 0) {
return res.send(`child process exited with code ${code}`)
}
console.log(output)
console.log("sending response")

res.send('<audio src="./audio/generated.wav" controls="true"></audio>')
});
app.post("/", function (req, res) {
var text = req.body.text;

console.log(text)
args = ["./main.py", 'synthesize', '--text', text, '--src', "./syllables/", '--dst', "./audio/", '--type', 'wav']
const pythonChoose = req.body.pythonChoose;
let pythonAlias;
if (pythonChoose && pythonChoose === 'python3') {
pythonAlias = 'python3'
} else {
pythonAlias = 'python'
}
var process = spawn(pythonAlias, args);
var output = "";
process.stdout.on('data', function (data) {
output += data
});
process.stderr.on('data', function (data) {
console.error(`stderr: ${data}`);
});
process.on("close", function (code) {
if (code !== 0) {
return res.send(`child process exited with code ${code}`)
}
console.log(output)
console.log("sending response")

res.send('<audio src="./audio/generated.wav" controls="true"></audio>')
});
});

app.get("/file", function (req, res) {
res.download("./audio/generated.wav")
});

app.get("/file", function(req, res) {
res.download("./audio/generated.wav")
app.get("/:pythonVersion/:type/:decodeUTF8/:compressed/:speed/:text", function (req, res) {
let text;
const audioType = req.params.type;
if (req.params.decodeUTF8 === 'true') {
text = utf8.decode(req.params.text);
} else {
text = req.params.text;
}
const speed = req.params.speed;

const args = ["./main.py",
'synthesize',
'--text', text,
'--src', "./syllables/",
'--dst', "./audio/",
'--type', audioType,
'--compressed', req.params.compressed,
'--speed',speed
]
const pythonChoose = req.params.pythonVersion;

let pythonAlias;
if (pythonChoose && pythonChoose === "python3") {
pythonAlias = 'python3'
} else {
pythonAlias = 'python'
}
let process = spawn(pythonAlias, args);

let output = "";
process.stdout.on('data', function (data) {
output += data
});
process.stderr.on('data', function (data) {
console.error(`stderr: ${data}`);
});
process.on("close", function (code) {
if (code !== 0) {
return res.send(`child process exited with code ${code}`)
}
console.log(output)
console.log("sending response")

res.sendFile(path.join(__dirname, "./audio/generated." + audioType))
});
});
74 changes: 49 additions & 25 deletions main.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,13 +19,13 @@
import argparse

# for demo only, please replace with your own API key
Turing_API_key = "64c88489ad7f432591d702ec1334dedc"
Turing_API_key = "64c88489ad7f432591d702ec1334dedc"
Turing_API_address = "http://www.tuling123.com/openapi/api"

class TextToSpeech:

class TextToSpeech:
CHUNK = 1024
punctuation = [',', '。','?','!','“','”',';',':','(',")",":",";",",",".","?","!","\"","\'","(",")"]
punctuation = [',', '。', '?', '!', '“', '”', ';', ':', '(', ")", ":", ";", ",", ".", "?", "!", "\"", "\'", "(", ")"]

def __init__(self):
pass
Expand All @@ -34,7 +34,7 @@ def speak(self, text):
syllables = lazy_pinyin(text, style=pypinyin.TONE3)
print(syllables)
delay = 0

def preprocess(syllables):
temp = []
for syllable in syllables:
Expand All @@ -51,26 +51,26 @@ def preprocess(syllables):

syllables = preprocess(syllables)
for syllable in syllables:
path = "syllables/"+syllable+".wav"
path = "syllables/" + syllable + ".wav"
_thread.start_new_thread(TextToSpeech._play_audio, (path, delay))
delay += 0.355

def synthesize(self, text, src, dst):
def synthesize(self, text, src, dst, audio_type, compressed, speed):
"""
Synthesize .wav from text
src is the folder that contains all syllables .wav files
dst is the destination folder to save the synthesized file
"""
print("Synthesizing ...")
delay = 0
increment = 355 # milliseconds
pause = 500 # pause for punctuation
increment = 355 # milliseconds
pause = 500 # pause for punctuation
syllables = lazy_pinyin(text, style=pypinyin.TONE3)

# initialize to be complete silence, each character takes up ~500ms
result = AudioSegment.silent(duration=500*len(text))
result = AudioSegment.silent(duration=500 * len(text))
for syllable in syllables:
path = src+syllable+".wav"
path = src + syllable + ".wav"
sound_file = Path(path)
# insert 500 ms silence for punctuation marks
if syllable in TextToSpeech.punctuation:
Expand All @@ -88,8 +88,19 @@ def synthesize(self, text, src, dst):
directory = dst
if not os.path.exists(directory):
os.makedirs(directory)
if speed != 0:
print("speed:" + str(2 ** float(speed)))
new_sample_rate = int(result.frame_rate * (2.0 ** float(speed)))
result = result._spawn(result.raw_data, overrides={'frame_rate': new_sample_rate})

if compressed is True:
print("compressed")
result.export(directory + "generated." + audio_type, format=audio_type,
parameters=["-ac", "1", "-ar", "8000"])
else:
print("not compressed")
result.export(directory + "generated." + audio_type, format=audio_type)

result.export(directory+"generated.wav", format="wav")
print("Exported.")

def _play_audio(path, delay):
Expand All @@ -101,13 +112,13 @@ def _play_audio(path, delay):
channels=wf.getnchannels(),
rate=wf.getframerate(),
output=True)

data = wf.readframes(TextToSpeech.CHUNK)

while data:
stream.write(data)
data = wf.readframes(TextToSpeech.CHUNK)

stream.stop_stream()
stream.close()

Expand All @@ -116,34 +127,39 @@ def _play_audio(path, delay):
except:
pass


def start_chatting(key, location):
print("你好!")
key = Turing_API_key if key is None else key
location = "北京市中关村" if location is None else location
while True:
sentence = input('输入中文:')
r = requests.post(
Turing_API_address,
json = {
"key": key,
"info": sentence,
"loc": location,
"userid":"1"
Turing_API_address,
json={
"key": key,
"info": sentence,
"loc": location,
"userid": "1"
})
response = r.json()["text"]
print(response)
tts.speak(response)


if __name__ == '__main__':
tts = TextToSpeech()

parser = argparse.ArgumentParser(description="HanTTS: Chinese Text-to-Speech program")
subparsers = parser.add_subparsers(title="subcommands", help='optional subcommands', dest='cmd')

synthesize_parser = subparsers.add_parser('synthesize', help='synthesize audio from text')
synthesize_parser.add_argument('--text', help='the text to convert to speech', dest='text')
synthesize_parser.add_argument('--src', help='source directory of audio library', dest='src')
synthesize_parser.add_argument('--dst', help='destination directory for generated .wav file', dest='dst')
synthesize_parser.add_argument('--type', help='choose the type of generated file, like wav or mp3', dest='type')
synthesize_parser.add_argument('--compressed', help='output compressed audio file', dest='compressed')
synthesize_parser.add_argument('--speed', help='modify output audio speed, follow by number', dest='speed')

chat_parser = subparsers.add_parser('chat', help='chat using Turing Robot API')
chat_parser.add_argument('--key', help='Turing Robot API key', dest='api_key')
Expand All @@ -163,11 +179,19 @@ def start_chatting(key, location):
synthesize_parser.print_help()
print('ERROR: Missing argument --dst')
sys.exit(1)
tts.synthesize(args.text, args.src, args.dst)
audioType = 'wav'
if args.type:
audioType = args.type
compressAudio = True
if args.compressed == 'false':
compressAudio = False
audioSpeed = 0
if args.speed != '0':
audioSpeed = args.speed
tts.synthesize(args.text, args.src, args.dst, audioType, compressAudio, audioSpeed)

elif args.cmd == 'chat':
start_chatting(args.api_key, args.location)
else:
while True:
tts.speak(input('输入中文:'))


Loading