From c428acf478bc2e0ea18da69e02554e095f8654b5 Mon Sep 17 00:00:00 2001 From: Reuben Morais Date: Tue, 31 Mar 2020 17:55:48 +0200 Subject: [PATCH] Automatically install ds_ctcdecoder in setup.py --- doc/TRAINING.rst | 11 +- setup.py | 101 ++++++++++++++---- taskcluster/tc-all-utils.sh | 22 ---- taskcluster/tc-single-shot-inference.sh | 3 - taskcluster/tc-train-tests.sh | 7 -- taskcluster/tc-transfer-tests.sh | 9 +- .../deepspeech_training/util/taskcluster.py | 28 ----- 7 files changed, 86 insertions(+), 95 deletions(-) diff --git a/doc/TRAINING.rst b/doc/TRAINING.rst index 99444df94d..2ef1512ee1 100644 --- a/doc/TRAINING.rst +++ b/doc/TRAINING.rst @@ -46,7 +46,8 @@ Install the required dependencies using ``pip3``\ : .. code-block:: bash cd DeepSpeech - pip3 install -e . + pip3 install --upgrade pip wheel setuptools + pip3 install --upgrade --force-reinstall -e . The ``webrtcvad`` Python package might require you to ensure you have proper tooling to build Python modules: @@ -54,14 +55,6 @@ The ``webrtcvad`` Python package might require you to ensure you have proper too sudo apt-get install python3-dev -You'll also need to install the ``ds_ctcdecoder`` Python package. ``ds_ctcdecoder`` is required for decoding the outputs of the ``deepspeech`` acoustic model into text. You can use ``util/taskcluster.py`` with the ``--decoder`` flag to get a URL to a binary of the decoder package appropriate for your platform and Python version: - -.. code-block:: bash - - pip3 install $(python3 util/taskcluster.py --decoder) - -This command will download and install the ``ds_ctcdecoder`` package. You can override the platform with ``--arch`` if you want the package for ARM7 (\ ``--arch arm``\ ) or ARM64 (\ ``--arch arm64``\ ). If you prefer building the ``ds_ctcdecoder`` package from source, see the :github:`native_client README file `. - Recommendations ^^^^^^^^^^^^^^^ diff --git a/setup.py b/setup.py index d50dc60181..bdd5682d11 100644 --- a/setup.py +++ b/setup.py @@ -1,13 +1,95 @@ +import os +import platform +import sys from pathlib import Path +from pkg_resources import parse_version from setuptools import find_packages, setup +def get_decoder_pkg_url(version, artifacts_root=None): + is_arm = 'arm' in platform.machine() + is_mac = 'darwin' in sys.platform + is_64bit = sys.maxsize > (2**31 - 1) + + if is_arm: + tc_arch = 'arm64-ctc' if is_64bit else 'arm-ctc' + elif is_mac: + tc_arch = 'osx-ctc' + else: + tc_arch = 'cpu-ctc' + + ds_version = parse_version(version) + branch = "v{}".format(version) + + plat = platform.system().lower() + arch = platform.machine() + + if plat == 'linux' and arch == 'x86_64': + plat = 'manylinux1' + + if plat == 'darwin': + plat = 'macosx_10_10' + + is_ucs2 = sys.maxunicode < 0x10ffff + m_or_mu = 'mu' if is_ucs2 else 'm' + + pyver = ''.join(str(i) for i in sys.version_info[0:2]) + + if not artifacts_root: + artifacts_root = 'https://community-tc.services.mozilla.com/api/index/v1/task/project.deepspeech.deepspeech.native_client.{branch_name}.{tc_arch_string}/artifacts/public'.format( + branch_name=branch, + tc_arch_string=tc_arch) + + return 'ds_ctcdecoder @ {artifacts_root}/ds_ctcdecoder-{ds_version}-cp{pyver}-cp{pyver}{m_or_mu}-{platform}_{arch}.whl'.format( + artifacts_root=artifacts_root, + ds_version=ds_version, + pyver=pyver, + m_or_mu=m_or_mu, + platform=plat, + arch=arch, + ) + + def main(): version_file = Path(__file__).parent / 'VERSION' with open(str(version_file)) as fin: version = fin.read().strip() + decoder_pkg_url = get_decoder_pkg_url(version) + + install_requires_base = [ + 'tensorflow == 1.15.2', + 'numpy == 1.18.1', + 'progressbar2', + 'six', + 'pyxdg', + 'attrdict', + 'absl-py', + 'semver', + 'opuslib == 2.0.0', + 'optuna', + 'sox', + 'bs4', + 'pandas', + 'requests', + 'librosa', + 'soundfile', + ] + + # Due to pip craziness environment variables are the only consistent way to + # get options into this script when doing `pip install`. + tc_decoder_artifacts_root = os.environ.get('DECODER_ARTIFACTS_ROOT', '') + if tc_decoder_artifacts_root: + # We're running inside the TaskCluster environment, override the decoder + # package URL with the one we just built. + decoder_pkg_url = get_decoder_pkg_url(version, tc_decoder_artifacts_root) + install_requires = install_requires_base + [decoder_pkg_url] + elif os.environ.get('DS_NODECODER', ''): + install_requires = install_requires_base + else: + install_requires = install_requires_base + [decoder_pkg_url] + setup( name='deepspeech_training', version=version, @@ -28,24 +110,7 @@ def main(): package_dir={'': 'training'}, packages=find_packages(where='training'), python_requires='>=3.5, <4', - install_requires=[ - 'tensorflow == 1.15.2', - 'numpy == 1.18.1', - 'progressbar2', - 'six', - 'pyxdg', - 'attrdict', - 'absl-py', - 'semver', - 'opuslib == 2.0.0', - 'optuna', - 'sox', - 'bs4', - 'pandas', - 'requests', - 'librosa', - 'soundfile', - ], + install_requires=install_requires, # If there are data files included in your packages that need to be # installed, specify them here. package_data={ diff --git a/taskcluster/tc-all-utils.sh b/taskcluster/tc-all-utils.sh index 6863c45194..1ef5b32599 100755 --- a/taskcluster/tc-all-utils.sh +++ b/taskcluster/tc-all-utils.sh @@ -122,25 +122,3 @@ verify_bazel_rebuild() exit 1 fi; } - -# Should be called from context where Python virtualenv is set -verify_ctcdecoder_url() -{ - default_url=$(python util/taskcluster.py --decoder) - echo "${default_url}" | grep -F "deepspeech.native_client.v${DS_VERSION}" - rc_default_url=$? - - tag_url=$(python util/taskcluster.py --decoder --branch 'v1.2.3') - echo "${tag_url}" | grep -F "deepspeech.native_client.v1.2.3" - rc_tag_url=$? - - master_url=$(python util/taskcluster.py --decoder --branch 'master') - echo "${master_url}" | grep -F "deepspeech.native_client.master" - rc_master_url=$? - - if [ ${rc_default_url} -eq 0 -a ${rc_tag_url} -eq 0 -a ${rc_master_url} -eq 0 ]; then - return 0 - else - return 1 - fi; -} diff --git a/taskcluster/tc-single-shot-inference.sh b/taskcluster/tc-single-shot-inference.sh index e058cecc39..13a8605b78 100755 --- a/taskcluster/tc-single-shot-inference.sh +++ b/taskcluster/tc-single-shot-inference.sh @@ -22,9 +22,6 @@ pushd ${HOME}/DeepSpeech/ds popd set +o pipefail -decoder_pkg_url=$(get_python_pkg_url ${pyver_pkg} ${py_unicode_type} "ds_ctcdecoder" "${DECODER_ARTIFACTS_ROOT}") -LD_LIBRARY_PATH=${PY37_LDPATH}:$LD_LIBRARY_PATH pip install --verbose --only-binary :all: --upgrade ${decoder_pkg_url} | cat - pushd ${HOME}/DeepSpeech/ds/ time ./bin/run-tc-ldc93s1_singleshotinference.sh popd diff --git a/taskcluster/tc-train-tests.sh b/taskcluster/tc-train-tests.sh index e5220df51a..c8708f50e8 100644 --- a/taskcluster/tc-train-tests.sh +++ b/taskcluster/tc-train-tests.sh @@ -21,13 +21,6 @@ pushd ${HOME}/DeepSpeech/ds popd set +o pipefail -pushd ${HOME}/DeepSpeech/ds/ - verify_ctcdecoder_url -popd - -decoder_pkg_url=$(get_python_pkg_url ${pyver_pkg} ${py_unicode_type} "ds_ctcdecoder" "${DECODER_ARTIFACTS_ROOT}") -LD_LIBRARY_PATH=${PY37_LDPATH}:$LD_LIBRARY_PATH pip install --verbose --only-binary :all: ${PY37_SOURCE_PACKAGE} ${decoder_pkg_url} | cat - # Prepare correct arguments for training case "${bitrate}" in 8k) diff --git a/taskcluster/tc-transfer-tests.sh b/taskcluster/tc-transfer-tests.sh index 382d4c91b9..3f9894cef4 100644 --- a/taskcluster/tc-transfer-tests.sh +++ b/taskcluster/tc-transfer-tests.sh @@ -15,17 +15,10 @@ virtualenv_activate "${pyalias}" "deepspeech" set -o pipefail pip install --upgrade pip==19.3.1 setuptools==45.0.0 wheel==0.33.6 | cat pushd ${HOME}/DeepSpeech/ds - pip install --upgrade . | cat + DS_NODECODER=1 pip install --upgrade . | cat popd set +o pipefail -pushd ${HOME}/DeepSpeech/ds/ - verify_ctcdecoder_url -popd - -decoder_pkg_url=$(get_python_pkg_url ${pyver_pkg} ${py_unicode_type} "ds_ctcdecoder" "${DECODER_ARTIFACTS_ROOT}") -LD_LIBRARY_PATH=${PY37_LDPATH}:$LD_LIBRARY_PATH pip install --verbose --only-binary :all: ${PY37_SOURCE_PACKAGE} ${decoder_pkg_url} | cat - pushd ${HOME}/DeepSpeech/ds/ time ./bin/run-tc-transfer.sh popd diff --git a/training/deepspeech_training/util/taskcluster.py b/training/deepspeech_training/util/taskcluster.py index 0fbf6fae28..d0053c7dce 100644 --- a/training/deepspeech_training/util/taskcluster.py +++ b/training/deepspeech_training/util/taskcluster.py @@ -90,8 +90,6 @@ def main(): help='Name of the TaskCluster scheme to use.') parser.add_argument('--branch', required=False, help='Branch name to use. Defaulting to current content of VERSION file.') - parser.add_argument('--decoder', action='store_true', - help='Get URL to ds_ctcdecoder Python package.') args = parser.parse_args() @@ -119,32 +117,6 @@ def main(): else: ds_version = parse_version(args.branch) - if args.decoder: - plat = platform.system().lower() - arch = platform.machine() - - if plat == 'linux' and arch == 'x86_64': - plat = 'manylinux1' - - if plat == 'darwin': - plat = 'macosx_10_10' - - m_or_mu = 'mu' if is_ucs2 else 'm' - pyver = ''.join(map(str, sys.version_info[0:2])) - - artifact = "ds_ctcdecoder-{ds_version}-cp{pyver}-cp{pyver}{m_or_mu}-{platform}_{arch}.whl".format( - ds_version=ds_version, - pyver=pyver, - m_or_mu=m_or_mu, - platform=plat, - arch=arch - ) - - ctc_arch = args.arch + '-ctc' - - print(get_tc_url(ctc_arch, artifact, args.branch)) - sys.exit(0) - if args.source is not None: if args.source in DEFAULT_SCHEMES: global TASKCLUSTER_SCHEME