Skip to content

Commit

Permalink
feat: add ctc segmentation tool to everyvoice
Browse files Browse the repository at this point in the history
  • Loading branch information
roedoejet committed Nov 11, 2023
1 parent ef0fc9a commit e828331
Show file tree
Hide file tree
Showing 5 changed files with 31 additions and 4 deletions.
3 changes: 3 additions & 0 deletions .gitmodules
Original file line number Diff line number Diff line change
Expand Up @@ -7,3 +7,6 @@
[submodule "everyvoice/model/vocoder/HiFiGAN_iSTFT_lightning"]
path = everyvoice/model/vocoder/HiFiGAN_iSTFT_lightning
url = ../HiFiGAN_iSTFT_lightning.git
[submodule "everyvoice/model/aligner/wav2vec2aligner"]
path = everyvoice/model/aligner/wav2vec2aligner
url = ../wav2vec2aligner.git
7 changes: 5 additions & 2 deletions everyvoice/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,9 +11,12 @@
sys.path.append(
os.path.join(parent_folder_path, "model", "aligner", "DeepForcedAligner")
)
sys.path.append(os.path.join(parent_folder_path, "model", "aligner", "wav2vec2aligner"))
sys.path.append(
os.path.join(parent_folder_path, "model", "aligner", "FastSpeech2_lightning")
os.path.join(
parent_folder_path, "model", "feature_prediction", "FastSpeech2_lightning"
)
)
sys.path.append(
os.path.join(parent_folder_path, "model", "aligner", "HiFiGAN_iSTFT_lightning")
os.path.join(parent_folder_path, "model", "vocoder", "HiFiGAN_iSTFT_lightning")
)
20 changes: 20 additions & 0 deletions everyvoice/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,9 @@
from everyvoice.config.preprocessing_config import PreprocessingConfig
from everyvoice.config.text_config import TextConfig
from everyvoice.model.aligner.config import AlignerConfig
from everyvoice.model.aligner.wav2vec2aligner.aligner.cli import (
align_single as ctc_segment,
)
from everyvoice.model.e2e.config import EveryVoiceConfig
from everyvoice.model.feature_prediction.config import FeaturePredictionConfig
from everyvoice.model.feature_prediction.FastSpeech2_lightning.fs2.cli import (
Expand Down Expand Up @@ -56,6 +59,11 @@ def list_commands(self, ctx):
To run the new dataset wizard please use the following command: everyvoice new-dataset
## Segment long files in your dataset
If you have long audio files that contain more than one utterance,
you can use the segmentation tool by running everyvoice segment [OPTIONS]
## Preprocess
Once you have a configuration, preprocess your data by running everyvoice preprocess [OPTIONS]
Expand All @@ -81,6 +89,18 @@ class ModelTypes(str, Enum):
spec_to_wav = "spec-to-wav"


app.command(
short_help="Segment a long audio file",
name="segment",
help="""
# Segmentation help
This command will segment a long audio file into multiple utterances which is required for training a TTS system.
This command should work on most languages and you should run it before running the new dataset or preprocessing steps.
""",
)(ctc_segment)


@app.command(
short_help="This command will help you create all the configuration necessary for using a new dataset.",
help="""
Expand Down
1 change: 1 addition & 0 deletions everyvoice/model/aligner/wav2vec2aligner
Submodule wav2vec2aligner added at e9d968
4 changes: 2 additions & 2 deletions requirements.torch.txt
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# these requirements have to be installed ahead of time in your environment and from a different URL:
# CUDA_TAG=cu117 pip install -r requirements.torch.txt --find-links https://download.pytorch.org/whl/torch_stable.html
torch==2.0.1+${CUDA_TAG}
torchaudio==2.0.2+${CUDA_TAG}
torch==2.1.0+${CUDA_TAG}
torchaudio==2.1.0+${CUDA_TAG}

0 comments on commit e828331

Please sign in to comment.