Skip to content

Commit

Permalink
Update TextToSpeech pipeline
Browse files Browse the repository at this point in the history
  • Loading branch information
davidmezzetti committed Jan 17, 2025
1 parent 77649d5 commit dca6938
Showing 1 changed file with 4 additions and 4 deletions.
8 changes: 4 additions & 4 deletions src/python/txtai/pipeline/audio/texttospeech.py
Original file line number Diff line number Diff line change
Expand Up @@ -278,14 +278,14 @@ def __call__(self, text, speaker):

# Split into batches and process
results = []
for x in self.chunk(tokens, self.maxtokens, self.tokenizer.punctuation()):
for i, x in enumerate(self.chunk(tokens, self.maxtokens, self.tokenizer.punctuation())):
# Format input parameters
params = {self.input: x}
params = {**params, **{"sids": np.array([speaker])}} if "sids" in self.params else params

# Run text through TTS model and save waveform
output = self.model.run(None, params)
results.append(Signal.trim(output[0], rate, trailing=False))
results.append(Signal.trim(output[0], rate, trailing=False) if i > 0 else output[0])

# Concatenate results and return
return (np.concatenate(results), rate)
Expand Down Expand Up @@ -423,10 +423,10 @@ def __call__(self, text, speaker):

# Split into batches and process
results = []
for x in self.chunk(inputs["input_ids"][0], self.maxtokens, self.punctids):
for i, x in enumerate(self.chunk(inputs["input_ids"][0], self.maxtokens, self.punctids)):
# Run text through TTS model and save waveform
chunk = self.process(np.array([x], dtype=np.int64), speaker)
results.append(Signal.trim(chunk, rate, trailing=False))
results.append(Signal.trim(chunk, rate, trailing=False) if i > 0 else chunk)

# Concatenate results and return
return (np.concatenate(results), rate)
Expand Down

0 comments on commit dca6938

Please sign in to comment.