junzew · LanHao0 · Aug 21, 2020 · Aug 22, 2020 · Aug 23, 2020 · Aug 23, 2020
diff --git a/.gitignore b/.gitignore
@@ -1,2 +1,4 @@
 .DS_Store
-.wav
+.wav
+node_modules
+venv
diff --git a/README.md b/README.md
@@ -4,6 +4,8 @@ Chinese Text-to-Speech(TTS)
 
 汉字 => ["han4", "zi4"] => .wav audio
 
+(environment: python 3)
+
 *Read this page in [简体中文](https://github.com/junzew/HanTTS/blob/master/README.zh.md)*
 ## Libraries Used
 
@@ -28,7 +30,6 @@ cd HanTTS
 pip install --user -r requires.txt
 ```
 
-Download [`syllables.zip`](https://sourceforge.net/projects/hantts/files/?source=navbar) from SourceForge, and decompress under the directory `HanTTS`.
 
 * Either run locally: `python main.py` 
 * Or through web interface:
@@ -39,6 +40,26 @@ Download [`syllables.zip`](https://sourceforge.net/projects/hantts/files/?source
 	```
 	Navigate to `localhost:3000` in a browser
 
+## Advance usage
+set audio params by yourself
+`http://127.0.0.1:3000/pythonAlias/audioType/decodeUTF8/compressed/speed/text`
+
+For example
+```
+http://127.0.0.1:3000/python3/wav/false/true/1/测试
+```
+
+|  params   | accept  |note|
+|  ----  | ----  |----|
+| pythonAlias  | python, python3 |if your device use 'python3' as command, you can use this|
+| audioType  | wav, mp3 (others not tested) | choose the output file type|
+|decodeUTF8|true, false|if text need to decode to utf-8|
+|compressed|true, false| output a smaller audio file|
+|speed|numbers, like -0.5, 1, 3 |( float and negative is accepted ), if you don't want to change speed, use 0|
+|text|chinese|the content of TTS|
+
+
+
 ## Use your own voice
 - Record [five tones](https://en.wikipedia.org/wiki/Pinyin#Tones) of each [pinyin](https://en.wikipedia.org/wiki/Pinyin_table) listed in `mapping.json`
 - Group them by the first letter (a,b,c,d, etc.), and save under folder `./recording` as `{letter}.wav`

diff --git a/README.zh.md b/README.zh.md
@@ -4,6 +4,7 @@
 
 汉字 => 拼音 ["han4", "zi4"] => .wav音频
 
+(environment: python 3)
 ## 使用的库
 
 #### 汉字转拼音
@@ -27,15 +28,31 @@ cd HanTTS
 pip install --user -r requires.txt
 ```
 
-从SourceForge下载语音库[`syllables.zip`](https://sourceforge.net/projects/hantts/files/?source=navbar)，并解压到`HanTTS`目录下
-
 - 本地执行 `python main.py`
 - 或 Web
 	- `cd` 到 `server` 文件夹下
 	- `npm install`
 	- `node app.js`
 	- 浏览器里打开`localhost:3000` 
 
+## 进阶使用
+自己设置音频参数
+`http://127.0.0.1:3000/pythonAlias/audioType/decodeUTF8/compressed/speed/text`
+
+For example
+```
+http://127.0.0.1:3000/python3/wav/false/true/1/测试
+```
+
+|  params   | accept  |note|
+|  ----  | ----  |----|
+| pythonAlias  | python, python3 |如果你的设备python别名为python3，请填写python3|
+| audioType  | wav, mp3 (其他没有测试) | 音频输出格式|
+|decodeUTF8|true, false|是否文字需要解码utf-8|
+|compressed|true, false|是否输出一个压缩文件|
+|speed|数字, 比如 -0.5, 1, 3 |( 可以使用float或者负数 ), 如果不想改变速度，填写 0|
+|text|中文|TTS的内容|
+
 ## 录制新的语音库
 - 按阴平、阳平、上声、去声、轻声的顺序录下 mapping.json 里每一个音节的五个声调
 - 按开头字母(letter)分组, 将文件存在 ./recording/{letter}.wav下

diff --git a/app.js b/app.js
@@ -2,40 +2,101 @@ const express = require('express')
 var bodyParser = require('body-parser');
 const spawn = require('child_process').spawn;
 var fs = require('fs');
+const path = require('path');
+const utf8 = require('utf8');
 
 const app = express()
 app.use(express.static('public'))
 app.use('/audio', express.static('audio'))
 app.use(bodyParser.json()); // for parsing application/json
 
 app.listen(process.env.PORT || 3000, function () {
-	console.log('app listening on port 3000')
-	var dir = './audio';
-	if (!fs.existsSync(dir)){
-	    fs.mkdirSync(dir);
-	    console.log("created directory ./audio")
-	}
+    console.log('app listening on port 3000')
+    var dir = './audio';
+    if (!fs.existsSync(dir)) {
+        fs.mkdirSync(dir);
+        console.log("created directory ./audio")
+    }
 });
 
-app.post("/", function(req, res) {
-	var text = req.body.text;
-	console.log(text)
-	args = ["./main.py", 'synthesize', '--text', text, '--src', "./syllables/", '--dst', "./audio/"]
-	var process = spawn('python', args);
-	var output = "";
-    process.stdout.on('data', function(data){ output += data });
-    process.stderr.on('data', function(data){ console.error(`stderr: ${data}`); });
-	process.on("close", function(code) {
-		if (code !== 0) {
-			return res.send(`child process exited with code ${code}`)
-		}
-		console.log(output)
-		console.log("sending response")
-
-		res.send('<audio src="./audio/generated.wav" controls="true"></audio>')
-	});
+app.post("/", function (req, res) {
+    var text = req.body.text;
+
+    console.log(text)
+    args = ["./main.py", 'synthesize', '--text', text, '--src', "./syllables/", '--dst', "./audio/", '--type', 'wav']
+    const pythonChoose = req.body.pythonChoose;
+    let pythonAlias;
+    if (pythonChoose && pythonChoose === 'python3') {
+        pythonAlias = 'python3'
+    } else {
+        pythonAlias = 'python'
+    }
+    var process = spawn(pythonAlias, args);
+    var output = "";
+    process.stdout.on('data', function (data) {
+        output += data
+    });
+    process.stderr.on('data', function (data) {
+        console.error(`stderr: ${data}`);
+    });
+    process.on("close", function (code) {
+        if (code !== 0) {
+            return res.send(`child process exited with code ${code}`)
+        }
+        console.log(output)
+        console.log("sending response")
+
+        res.send('<audio src="./audio/generated.wav" controls="true"></audio>')
+    });
+});
+
+app.get("/file", function (req, res) {
+    res.download("./audio/generated.wav")
 });
 
-app.get("/file", function(req, res) {
-	res.download("./audio/generated.wav")
+app.get("/:pythonVersion/:type/:decodeUTF8/:compressed/:speed/:text", function (req, res) {
+    let text;
+    const audioType = req.params.type;
+    if (req.params.decodeUTF8 === 'true') {
+        text = utf8.decode(req.params.text);
+    } else {
+        text = req.params.text;
+    }
+    const speed = req.params.speed;
+
+    const args = ["./main.py",
+        'synthesize',
+        '--text', text,
+        '--src', "./syllables/",
+        '--dst', "./audio/",
+        '--type', audioType,
+        '--compressed', req.params.compressed,
+        '--speed',speed
+    ]
+    const pythonChoose = req.params.pythonVersion;
+
+    let pythonAlias;
+    if (pythonChoose && pythonChoose === "python3") {
+        pythonAlias = 'python3'
+    } else {
+        pythonAlias = 'python'
+    }
+    let process = spawn(pythonAlias, args);
+
+    let output = "";
+    process.stdout.on('data', function (data) {
+        output += data
+    });
+    process.stderr.on('data', function (data) {
+        console.error(`stderr: ${data}`);
+    });
+    process.on("close", function (code) {
+        if (code !== 0) {
+            return res.send(`child process exited with code ${code}`)
+        }
+        console.log(output)
+        console.log("sending response")
+
+        res.sendFile(path.join(__dirname, "./audio/generated." + audioType))
+    });
 });
diff --git a/main.py b/main.py
@@ -19,13 +19,13 @@
 import argparse
 
 # for demo only, please replace with your own API key
-Turing_API_key = "64c88489ad7f432591d702ec1334dedc" 
+Turing_API_key = "64c88489ad7f432591d702ec1334dedc"
 Turing_API_address = "http://www.tuling123.com/openapi/api"
 
-class TextToSpeech:
 
+class TextToSpeech:
     CHUNK = 1024
-    punctuation = ['，', '。','？','！','“','”','；','：','（',"）",":",";",",",".","?","!","\"","\'","(",")"]
+    punctuation = ['，', '。', '？', '！', '“', '”', '；', '：', '（', "）", ":", ";", ",", ".", "?", "!", "\"", "\'", "(", ")"]
 
     def __init__(self):
         pass
@@ -34,7 +34,7 @@ def speak(self, text):
         syllables = lazy_pinyin(text, style=pypinyin.TONE3)
         print(syllables)
         delay = 0
-        
+
         def preprocess(syllables):
             temp = []
             for syllable in syllables:
@@ -51,26 +51,26 @@ def preprocess(syllables):
 
         syllables = preprocess(syllables)
         for syllable in syllables:
-            path = "syllables/"+syllable+".wav"
+            path = "syllables/" + syllable + ".wav"
             _thread.start_new_thread(TextToSpeech._play_audio, (path, delay))
             delay += 0.355
 
-    def synthesize(self, text, src, dst):
+    def synthesize(self, text, src, dst, audio_type, compressed, speed):
         """
         Synthesize .wav from text
         src is the folder that contains all syllables .wav files
         dst is the destination folder to save the synthesized file
         """
         print("Synthesizing ...")
         delay = 0
-        increment = 355 # milliseconds
-        pause = 500 # pause for punctuation
+        increment = 355  # milliseconds
+        pause = 500  # pause for punctuation
         syllables = lazy_pinyin(text, style=pypinyin.TONE3)
 
         # initialize to be complete silence, each character takes up ~500ms
-        result = AudioSegment.silent(duration=500*len(text))
+        result = AudioSegment.silent(duration=500 * len(text))
         for syllable in syllables:
-            path = src+syllable+".wav"
+            path = src + syllable + ".wav"
             sound_file = Path(path)
             # insert 500 ms silence for punctuation marks
             if syllable in TextToSpeech.punctuation:
@@ -88,8 +88,19 @@ def synthesize(self, text, src, dst):
         directory = dst
         if not os.path.exists(directory):
             os.makedirs(directory)
+        if speed != 0:
+            print("speed:" + str(2 ** float(speed)))
+            new_sample_rate = int(result.frame_rate * (2.0 ** float(speed)))
+            result = result._spawn(result.raw_data, overrides={'frame_rate': new_sample_rate})
+
+        if compressed is True:
+            print("compressed")
+            result.export(directory + "generated." + audio_type, format=audio_type,
+                          parameters=["-ac", "1", "-ar", "8000"])
+        else:
+            print("not compressed")
+            result.export(directory + "generated." + audio_type, format=audio_type)
 
-        result.export(directory+"generated.wav", format="wav")
         print("Exported.")
 
     def _play_audio(path, delay):
@@ -101,13 +112,13 @@ def _play_audio(path, delay):
                             channels=wf.getnchannels(),
                             rate=wf.getframerate(),
                             output=True)
-            
+
             data = wf.readframes(TextToSpeech.CHUNK)
-            
+
             while data:
                 stream.write(data)
                 data = wf.readframes(TextToSpeech.CHUNK)
-        
+
             stream.stop_stream()
             stream.close()
 
@@ -116,34 +127,39 @@ def _play_audio(path, delay):
         except:
             pass
 
+
 def start_chatting(key, location):
     print("你好!")
     key = Turing_API_key if key is None else key
     location = "北京市中关村" if location is None else location
     while True:
         sentence = input('输入中文：')
         r = requests.post(
-            Turing_API_address, 
-            json = {
-            "key": key,
-            "info": sentence, 
-            "loc": location, 
-            "userid":"1"
+            Turing_API_address,
+            json={
+                "key": key,
+                "info": sentence,
+                "loc": location,
+                "userid": "1"
             })
         response = r.json()["text"]
         print(response)
         tts.speak(response)
 
+
 if __name__ == '__main__':
     tts = TextToSpeech()
-    
+
     parser = argparse.ArgumentParser(description="HanTTS: Chinese Text-to-Speech program")
     subparsers = parser.add_subparsers(title="subcommands", help='optional subcommands', dest='cmd')
-    
+
     synthesize_parser = subparsers.add_parser('synthesize', help='synthesize audio from text')
     synthesize_parser.add_argument('--text', help='the text to convert to speech', dest='text')
     synthesize_parser.add_argument('--src', help='source directory of audio library', dest='src')
     synthesize_parser.add_argument('--dst', help='destination directory for generated .wav file', dest='dst')
+    synthesize_parser.add_argument('--type', help='choose the type of generated file, like wav or mp3', dest='type')
+    synthesize_parser.add_argument('--compressed', help='output compressed audio file', dest='compressed')
+    synthesize_parser.add_argument('--speed', help='modify output audio speed, follow by number', dest='speed')
 
     chat_parser = subparsers.add_parser('chat', help='chat using Turing Robot API')
     chat_parser.add_argument('--key', help='Turing Robot API key', dest='api_key')
@@ -163,11 +179,19 @@ def start_chatting(key, location):
             synthesize_parser.print_help()
             print('ERROR: Missing argument --dst')
             sys.exit(1)
-        tts.synthesize(args.text, args.src, args.dst)
+        audioType = 'wav'
+        if args.type:
+            audioType = args.type
+        compressAudio = True
+        if args.compressed == 'false':
+            compressAudio = False
+        audioSpeed = 0
+        if args.speed != '0':
+            audioSpeed = args.speed
+        tts.synthesize(args.text, args.src, args.dst, audioType, compressAudio, audioSpeed)
+
     elif args.cmd == 'chat':
         start_chatting(args.api_key, args.location)
     else:
         while True:
             tts.speak(input('输入中文：'))
-
-