forked from amitybell/piper
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathtts.go
146 lines (126 loc) · 2.93 KB
/
tts.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
package piper
import (
"bytes"
"fmt"
"io"
"os"
"os/exec"
"path/filepath"
"runtime"
"strings"
"github.com/adrg/xdg"
asset "github.com/amitybell/piper-asset"
)
type TTS struct {
ModelCard string
VoiceName string
onnxFn string
jsonFn string
piperExe string
piperDir string
}
type VoiceOptions struct {
// default is: 1.0
speed float32
// default is: 0.667
noise float32
// default is: 0.2
pause float32
}
type Option func(*VoiceOptions)
func WithSpeed(speed float32) Option {
return func(vo *VoiceOptions) {
vo.speed = speed
}
}
func WithNoise(noise float32) Option {
return func(vo *VoiceOptions) {
vo.noise = noise
}
}
func WithPause(pause float32) Option {
return func(vo *VoiceOptions) {
vo.pause = pause
}
}
func (t *TTS) Synthesize(text string, opts ...Option) (wav []byte, err error) {
options := &VoiceOptions{
speed: 1.0,
noise: 0.667,
pause: 0.2,
}
for _, opt := range opts {
opt(options)
}
stdoutFn := "-"
var stdout io.Writer
if runtime.GOOS != "windows" {
stdout = bytes.NewBuffer(nil)
} else {
tmpDir, err := os.MkdirTemp("", "ab-piper.")
if err != nil {
return nil, fmt.Errorf("TTS.Synthesize: Cannot create temp file: %w", err)
}
defer os.RemoveAll(tmpDir)
stdoutFn = filepath.Join(tmpDir, "tts.wav")
}
args := []string{
"--model", t.onnxFn,
"--config", t.jsonFn,
"--output_file", stdoutFn,
}
if options.speed != 1.0 {
args = append(args, "--length_scale", fmt.Sprintf("%f", options.speed))
}
if options.noise != 0.667 {
args = append(args, "--noise_scale", fmt.Sprintf("%f", options.noise))
}
if options.pause != 0.2 {
args = append(args, "--sentence_silence", fmt.Sprintf("%f", options.pause))
}
stdin := strings.NewReader(text)
stderr := bytes.NewBuffer(nil)
cmd := exec.Command(t.piperExe, args...)
cmd.Dir = t.piperDir
cmd.Stdin = stdin
cmd.Stdout = stdout
cmd.Stderr = stderr
cmd.SysProcAttr = sysProcAttr
if err := cmd.Run(); err != nil {
return nil, fmt.Errorf("TTS.Synthesize: %s: %s: %s", cmd, err, stderr.Bytes())
}
if stdout != nil {
return stdout.(*bytes.Buffer).Bytes(), nil
}
wav, err = os.ReadFile(stdoutFn)
if err != nil {
return nil, fmt.Errorf("TTS.Synthesize: %s", err)
}
return wav, nil
}
func New(dataDir string, voice asset.Asset) (*TTS, error) {
if dataDir == "" {
dir, err := xdg.DataFile("ab-piper")
if err != nil {
return nil, fmt.Errorf("piper.Install: cannot create data dir: %w", err)
}
dataDir = dir
}
desc, onnxFn, jsonFn, err := installVoice(filepath.Join(dataDir, "piper-voice-"+voice.Name), voice.FS)
if err != nil {
return nil, fmt.Errorf("piper.Install: cannot install piper voice: %w", err)
}
exeFn, err := installPiper(dataDir)
if err != nil {
return nil, fmt.Errorf("piper.Install: cannot install piper binary: %w", err)
}
t := &TTS{
ModelCard: desc,
VoiceName: voice.Name,
onnxFn: onnxFn,
jsonFn: jsonFn,
piperDir: filepath.Dir(exeFn),
piperExe: exeFn,
}
return t, nil
}