Skip to content

Commit

Permalink
Merge remote-tracking branch 'upstream/main' into lstm-onnx
Browse files Browse the repository at this point in the history
  • Loading branch information
borisfom committed Feb 9, 2023
2 parents 34a65d4 + dfdd8f0 commit e140b7f
Show file tree
Hide file tree
Showing 9 changed files with 100 additions and 53 deletions.
2 changes: 1 addition & 1 deletion README.rst
Original file line number Diff line number Diff line change
Expand Up @@ -98,7 +98,7 @@ Key Features
* `Speech synthesis (TTS) <https://docs.nvidia.com/deeplearning/nemo/user-guide/docs/en/main/tts/intro.html#>`_
* Spectrogram generation: Tacotron2, GlowTTS, TalkNet, FastPitch, FastSpeech2, Mixer-TTS, Mixer-TTS-X
* Vocoders: WaveGlow, SqueezeWave, UniGlow, MelGAN, HiFiGAN, UnivNet
* End-to-end speech generation: FastPitch_HifiGan_E2E, FastSpeech2_HifiGan_E2E
* End-to-end speech generation: FastPitch_HifiGan_E2E, FastSpeech2_HifiGan_E2E, VITS
* `NGC collection of pre-trained TTS models. <https://ngc.nvidia.com/catalog/collections/nvidia:nemo_tts>`_
* `Tools <https://github.com/NVIDIA/NeMo/tree/stable/tools>`_
* `Text Processing (text normalization and inverse text normalization) <https://docs.nvidia.com/deeplearning/nemo/user-guide/docs/en/main/nlp/text_normalization/intro.html>`_
Expand Down
6 changes: 6 additions & 0 deletions docs/source/tts/checkpoints.rst
Original file line number Diff line number Diff line change
Expand Up @@ -144,4 +144,10 @@ Vocoders
.. csv-table::
:file: data/ngc_models_vocoder.csv
:align: left
:header-rows: 1
End2End models
^^^^^^^^
.. csv-table::
:file: data/ngc_models_e2e.csv
:align: left
:header-rows: 1
2 changes: 2 additions & 0 deletions docs/source/tts/data/ngc_models_e2e.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
Locale,Model Name,Dataset,Sampling Rate,#Spk,Phoneme Unit,Model Class,Overview,Checkpoint
en-US,tts_en_lj_vits,LJSpeech,22050Hz,1,IPA,nemo.collections.tts.models.vits.VitsModel,`tts_en_lj_vits <https://ngc.nvidia.com/catalog/models/nvidia:nemo:tts_en_lj_vits>`_,``https://api.ngc.nvidia.com/v2/models/nvidia/nemo/tts_en_lj_vits/versions/1.13.0/files/vits_ljspeech_fp16_full.nemo``
9 changes: 8 additions & 1 deletion nemo/collections/tts/models/vits.py
Original file line number Diff line number Diff line change
Expand Up @@ -372,7 +372,14 @@ def setup_test_data(self, cfg):
@classmethod
def list_available_models(cls) -> 'List[PretrainedModelInfo]':
list_of_models = []
# TODO: List available models??
model = PretrainedModelInfo(
pretrained_model_name="tts_en_lj_vits",
location="https://api.ngc.nvidia.com/v2/models/nvidia/nemo/tts_en_lj_vits/versions/1.13.0/files/vits_ljspeech_fp16_full.nemo",
description="This model is trained on LJSpeech audio sampled at 22050Hz. This model has been tested on generating female English "
"voices with an American accent.",
class_=cls,
)
list_of_models.append(model)
return list_of_models

@typecheck(
Expand Down
5 changes: 3 additions & 2 deletions nemo/collections/tts/modules/fastpitch.py
Original file line number Diff line number Diff line change
Expand Up @@ -287,7 +287,7 @@ def forward(

# Predict energy
if self.energy_predictor is not None:
energy_pred = self.energy_predictor(enc_out, enc_mask).squeeze(-1)
energy_pred = self.energy_predictor(prosody_input, enc_mask).squeeze(-1)

if energy is not None:
# Average energy over characters
Expand Down Expand Up @@ -357,6 +357,8 @@ def infer(self, *, text, pitch=None, speaker=None, energy=None, pace=1.0, volume
)
pitch_predicted = self.pitch_predictor(prosody_input, enc_mask) + pitch
pitch_emb = self.pitch_emb(pitch_predicted.unsqueeze(1))
enc_out = enc_out + pitch_emb.transpose(1, 2)

if self.energy_predictor is not None:
if energy is not None:
assert energy.shape[-1] == text.shape[-1], f"energy.shape[-1]: {energy.shape[-1]} != len(text)"
Expand All @@ -365,7 +367,6 @@ def infer(self, *, text, pitch=None, speaker=None, energy=None, pace=1.0, volume
energy_pred = self.energy_predictor(prosody_input, enc_mask).squeeze(-1)
energy_emb = self.energy_emb(energy_pred.unsqueeze(1))
enc_out = enc_out + energy_emb.transpose(1, 2)
enc_out = enc_out + pitch_emb.transpose(1, 2)

# Expand to decoder time dimension
len_regulated, dec_lens = regulate_len(durs_predicted, enc_out, pace)
Expand Down
60 changes: 60 additions & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,60 @@
# Copyright (c) 2023, NVIDIA CORPORATION. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

[tool.isort]
profile = "black" # black-compatible
line_length = 119 # should match black parameters
ignore_whitespace = true # ignore whitespace for compatibility with the initial style
py_version = 38 # python 3.8 as a target version
known_first_party = ["nemo"] # FIRSTPARTY section
known_third_party = ["nemo_text_processing", "examples", "scripts"] # THIRDPARTY section
sections = ["FUTURE", "STDLIB", "THIRDPARTY", "FIRSTPARTY", "LOCALFOLDER"]
default_section = "THIRDPARTY"
extend_skip = ["setup.py", "docs/source/conf.py"]


[tool.pytest.ini_options]
# durations=0 will display all tests execution time, sorted in ascending order starting from from the slowest one.
# -vv will also display tests with durration = 0.00s
addopts = "--verbose --pyargs --durations=0 --strict-markers" # always add these arguments to pytest
testpaths = ["tests"]
# directories to ignore when discovering tests
norecursedirs = [
"nemo",
"nemo_text_processing",
"external",
"examples",
"docs",
"scripts",
"tools",
"tutorials",
"*.egg",
".*",
"_darcs",
"build",
"CVS",
"dist",
"venv",
"{arch}"
]
# markers to select tests, use `pytest --markers` to see all available markers, `pytest -m "<marker>"` to select tests
markers = [
"unit: marks unit test, i.e. testing a single, well isolated functionality (deselect with '-m \"not unit\"')",
"integration: marks test checking the elements when integrated into subsystems (deselect with '-m \"not integration\"')",
"system: marks test working at the highest integration level (deselect with '-m \"not system\"')",
"acceptance: marks test checking whether the developed product/model passes the user defined acceptance criteria (deselect with '-m \"not acceptance\"')",
"docs: mark tests related to documentation (deselect with '-m \"not docs\"')",
"skipduringci: marks tests that are skipped ci as they are addressed by Jenkins jobs but should be run to test user setups",
"pleasefixme: marks tests that are broken and need fixing",
]
22 changes: 20 additions & 2 deletions scripts/installers/install_torchaudio_latest.sh
Original file line number Diff line number Diff line change
Expand Up @@ -26,22 +26,40 @@ LATEST_RELEASE=$(git -c 'versionsort.suffix=-' \
# expected TORCHAUDIO_BUILD_VERSION=*.**.*
TORCHAUDIO_BUILD_VERSION=${LATEST_RELEASE:8:1}${PYTORCH_VERSION:1:5}

TORCH_MAJOR_VERSION=$(python3 -c "major_version = \"${PYTORCH_VERSION}\".split('.')[0]; print(major_version)")
TORCH_MINOR_VERSION=$(python3 -c "minor_version = \"${PYTORCH_VERSION}\".split('.')[1]; print(minor_version)")
TORCHAUDIO_MINOR_VERSION=$(python3 -c "minor_version = \"${LATEST_RELEASE}\".rsplit('.')[-1]; print(minor_version)")

if [[ $TORCH_MAJOR_VERSION -ne 1 ]]; then
echo "WARNING: Pytorch major version different from 1 not supported"
fi

echo "Latest torchaudio release: ${LATEST_RELEASE:8:4}"
echo "Pytorch version: ${PYTORCH_VERSION:0:6}"
echo "Torchaudio build version: ${TORCHAUDIO_BUILD_VERSION}"

if [[ "$TORCH_MINOR_VERSION" -lt "$TORCHAUDIO_MINOR_VERSION" ]]; then
# for old containers, we need to install matching torchaudio version
INSTALL_BRANCH="release/0.${TORCH_MINOR_VERSION}"
else
# for new containers use latest release
INSTALL_BRANCH=${LATEST_RELEASE}
fi

echo "Installing torchaudio from branch: ${INSTALL_BRANCH}"

# we need parameterized to run torchaudio tests
# suppose that we do not have parameterized installed yet
pip install parameterized

# Build torchaudio and run MFCC test
git clone --depth 1 --branch ${LATEST_RELEASE} https://github.com/pytorch/audio.git && \
git clone --depth 1 --branch ${INSTALL_BRANCH} https://github.com/pytorch/audio.git && \
cd audio && \
git submodule update --init --recursive && \
BUILD_SOX=1 BUILD_VERSION=${TORCHAUDIO_BUILD_VERSION} python setup.py install && \
cd .. && \
pytest -rs audio/test/torchaudio_unittest/transforms/torchscript_consistency_cpu_test.py -k 'test_MFCC' || \
(echo "ERROR: Failed to install torchaudio!"; exit 1);
{ echo "ERROR: Failed to install torchaudio!"; exit 1; };
# RNNT loss is built with CUDA, so checking it will suffice
# This test will be skipped if CUDA is not available (e.g. when building from docker)
pytest -rs audio/test/torchaudio_unittest/functional/torchscript_consistency_cuda_test.py -k 'test_rnnt_loss' || \
Expand Down
43 changes: 0 additions & 43 deletions setup.cfg

This file was deleted.

4 changes: 0 additions & 4 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -134,8 +134,6 @@ def req_file(filename, folder="requirements"):

extras_require['slu'] = list(chain([extras_require['slu'], extras_require['asr']]))

tests_requirements = extras_require["test"]


###############################################################################
# Code style checkers #
Expand Down Expand Up @@ -257,8 +255,6 @@ def finalize_options(self):
],
packages=setuptools.find_packages(),
install_requires=install_requires,
setup_requires=['pytest-runner'],
tests_require=tests_requirements,
# List additional groups of dependencies here (e.g. development
# dependencies). You can install these using the following syntax,
# $ pip install -e ".[all]"
Expand Down

0 comments on commit e140b7f

Please sign in to comment.