Skip to content

Commit

Permalink
Automatically install ds_ctcdecoder in setup.py
Browse files Browse the repository at this point in the history
  • Loading branch information
reuben committed Mar 31, 2020
1 parent 83d22e5 commit c428acf
Show file tree
Hide file tree
Showing 7 changed files with 86 additions and 95 deletions.
11 changes: 2 additions & 9 deletions doc/TRAINING.rst
Original file line number Diff line number Diff line change
Expand Up @@ -46,22 +46,15 @@ Install the required dependencies using ``pip3``\ :
.. code-block:: bash
cd DeepSpeech
pip3 install -e .
pip3 install --upgrade pip wheel setuptools
pip3 install --upgrade --force-reinstall -e .
The ``webrtcvad`` Python package might require you to ensure you have proper tooling to build Python modules:

.. code-block:: bash
sudo apt-get install python3-dev
You'll also need to install the ``ds_ctcdecoder`` Python package. ``ds_ctcdecoder`` is required for decoding the outputs of the ``deepspeech`` acoustic model into text. You can use ``util/taskcluster.py`` with the ``--decoder`` flag to get a URL to a binary of the decoder package appropriate for your platform and Python version:

.. code-block:: bash
pip3 install $(python3 util/taskcluster.py --decoder)
This command will download and install the ``ds_ctcdecoder`` package. You can override the platform with ``--arch`` if you want the package for ARM7 (\ ``--arch arm``\ ) or ARM64 (\ ``--arch arm64``\ ). If you prefer building the ``ds_ctcdecoder`` package from source, see the :github:`native_client README file <native_client/README.rst>`.

Recommendations
^^^^^^^^^^^^^^^

Expand Down
101 changes: 83 additions & 18 deletions setup.py
Original file line number Diff line number Diff line change
@@ -1,13 +1,95 @@
import os
import platform
import sys
from pathlib import Path

from pkg_resources import parse_version
from setuptools import find_packages, setup


def get_decoder_pkg_url(version, artifacts_root=None):
is_arm = 'arm' in platform.machine()
is_mac = 'darwin' in sys.platform
is_64bit = sys.maxsize > (2**31 - 1)

if is_arm:
tc_arch = 'arm64-ctc' if is_64bit else 'arm-ctc'
elif is_mac:
tc_arch = 'osx-ctc'
else:
tc_arch = 'cpu-ctc'

ds_version = parse_version(version)
branch = "v{}".format(version)

plat = platform.system().lower()
arch = platform.machine()

if plat == 'linux' and arch == 'x86_64':
plat = 'manylinux1'

if plat == 'darwin':
plat = 'macosx_10_10'

is_ucs2 = sys.maxunicode < 0x10ffff
m_or_mu = 'mu' if is_ucs2 else 'm'

pyver = ''.join(str(i) for i in sys.version_info[0:2])

if not artifacts_root:
artifacts_root = 'https://community-tc.services.mozilla.com/api/index/v1/task/project.deepspeech.deepspeech.native_client.{branch_name}.{tc_arch_string}/artifacts/public'.format(
branch_name=branch,
tc_arch_string=tc_arch)

return 'ds_ctcdecoder @ {artifacts_root}/ds_ctcdecoder-{ds_version}-cp{pyver}-cp{pyver}{m_or_mu}-{platform}_{arch}.whl'.format(
artifacts_root=artifacts_root,
ds_version=ds_version,
pyver=pyver,
m_or_mu=m_or_mu,
platform=plat,
arch=arch,
)


def main():
version_file = Path(__file__).parent / 'VERSION'
with open(str(version_file)) as fin:
version = fin.read().strip()

decoder_pkg_url = get_decoder_pkg_url(version)

install_requires_base = [
'tensorflow == 1.15.2',
'numpy == 1.18.1',
'progressbar2',
'six',
'pyxdg',
'attrdict',
'absl-py',
'semver',
'opuslib == 2.0.0',
'optuna',
'sox',
'bs4',
'pandas',
'requests',
'librosa',
'soundfile',
]

# Due to pip craziness environment variables are the only consistent way to
# get options into this script when doing `pip install`.
tc_decoder_artifacts_root = os.environ.get('DECODER_ARTIFACTS_ROOT', '')
if tc_decoder_artifacts_root:
# We're running inside the TaskCluster environment, override the decoder
# package URL with the one we just built.
decoder_pkg_url = get_decoder_pkg_url(version, tc_decoder_artifacts_root)
install_requires = install_requires_base + [decoder_pkg_url]
elif os.environ.get('DS_NODECODER', ''):
install_requires = install_requires_base
else:
install_requires = install_requires_base + [decoder_pkg_url]

setup(
name='deepspeech_training',
version=version,
Expand All @@ -28,24 +110,7 @@ def main():
package_dir={'': 'training'},
packages=find_packages(where='training'),
python_requires='>=3.5, <4',
install_requires=[
'tensorflow == 1.15.2',
'numpy == 1.18.1',
'progressbar2',
'six',
'pyxdg',
'attrdict',
'absl-py',
'semver',
'opuslib == 2.0.0',
'optuna',
'sox',
'bs4',
'pandas',
'requests',
'librosa',
'soundfile',
],
install_requires=install_requires,
# If there are data files included in your packages that need to be
# installed, specify them here.
package_data={
Expand Down
22 changes: 0 additions & 22 deletions taskcluster/tc-all-utils.sh
Original file line number Diff line number Diff line change
Expand Up @@ -122,25 +122,3 @@ verify_bazel_rebuild()
exit 1
fi;
}

# Should be called from context where Python virtualenv is set
verify_ctcdecoder_url()
{
default_url=$(python util/taskcluster.py --decoder)
echo "${default_url}" | grep -F "deepspeech.native_client.v${DS_VERSION}"
rc_default_url=$?

tag_url=$(python util/taskcluster.py --decoder --branch 'v1.2.3')
echo "${tag_url}" | grep -F "deepspeech.native_client.v1.2.3"
rc_tag_url=$?

master_url=$(python util/taskcluster.py --decoder --branch 'master')
echo "${master_url}" | grep -F "deepspeech.native_client.master"
rc_master_url=$?

if [ ${rc_default_url} -eq 0 -a ${rc_tag_url} -eq 0 -a ${rc_master_url} -eq 0 ]; then
return 0
else
return 1
fi;
}
3 changes: 0 additions & 3 deletions taskcluster/tc-single-shot-inference.sh
Original file line number Diff line number Diff line change
Expand Up @@ -22,9 +22,6 @@ pushd ${HOME}/DeepSpeech/ds
popd
set +o pipefail

decoder_pkg_url=$(get_python_pkg_url ${pyver_pkg} ${py_unicode_type} "ds_ctcdecoder" "${DECODER_ARTIFACTS_ROOT}")
LD_LIBRARY_PATH=${PY37_LDPATH}:$LD_LIBRARY_PATH pip install --verbose --only-binary :all: --upgrade ${decoder_pkg_url} | cat

pushd ${HOME}/DeepSpeech/ds/
time ./bin/run-tc-ldc93s1_singleshotinference.sh
popd
Expand Down
7 changes: 0 additions & 7 deletions taskcluster/tc-train-tests.sh
Original file line number Diff line number Diff line change
Expand Up @@ -21,13 +21,6 @@ pushd ${HOME}/DeepSpeech/ds
popd
set +o pipefail

pushd ${HOME}/DeepSpeech/ds/
verify_ctcdecoder_url
popd

decoder_pkg_url=$(get_python_pkg_url ${pyver_pkg} ${py_unicode_type} "ds_ctcdecoder" "${DECODER_ARTIFACTS_ROOT}")
LD_LIBRARY_PATH=${PY37_LDPATH}:$LD_LIBRARY_PATH pip install --verbose --only-binary :all: ${PY37_SOURCE_PACKAGE} ${decoder_pkg_url} | cat

# Prepare correct arguments for training
case "${bitrate}" in
8k)
Expand Down
9 changes: 1 addition & 8 deletions taskcluster/tc-transfer-tests.sh
Original file line number Diff line number Diff line change
Expand Up @@ -15,17 +15,10 @@ virtualenv_activate "${pyalias}" "deepspeech"
set -o pipefail
pip install --upgrade pip==19.3.1 setuptools==45.0.0 wheel==0.33.6 | cat
pushd ${HOME}/DeepSpeech/ds
pip install --upgrade . | cat
DS_NODECODER=1 pip install --upgrade . | cat
popd
set +o pipefail

pushd ${HOME}/DeepSpeech/ds/
verify_ctcdecoder_url
popd

decoder_pkg_url=$(get_python_pkg_url ${pyver_pkg} ${py_unicode_type} "ds_ctcdecoder" "${DECODER_ARTIFACTS_ROOT}")
LD_LIBRARY_PATH=${PY37_LDPATH}:$LD_LIBRARY_PATH pip install --verbose --only-binary :all: ${PY37_SOURCE_PACKAGE} ${decoder_pkg_url} | cat

pushd ${HOME}/DeepSpeech/ds/
time ./bin/run-tc-transfer.sh
popd
Expand Down
28 changes: 0 additions & 28 deletions training/deepspeech_training/util/taskcluster.py
Original file line number Diff line number Diff line change
Expand Up @@ -90,8 +90,6 @@ def main():
help='Name of the TaskCluster scheme to use.')
parser.add_argument('--branch', required=False,
help='Branch name to use. Defaulting to current content of VERSION file.')
parser.add_argument('--decoder', action='store_true',
help='Get URL to ds_ctcdecoder Python package.')

args = parser.parse_args()

Expand Down Expand Up @@ -119,32 +117,6 @@ def main():
else:
ds_version = parse_version(args.branch)

if args.decoder:
plat = platform.system().lower()
arch = platform.machine()

if plat == 'linux' and arch == 'x86_64':
plat = 'manylinux1'

if plat == 'darwin':
plat = 'macosx_10_10'

m_or_mu = 'mu' if is_ucs2 else 'm'
pyver = ''.join(map(str, sys.version_info[0:2]))

artifact = "ds_ctcdecoder-{ds_version}-cp{pyver}-cp{pyver}{m_or_mu}-{platform}_{arch}.whl".format(
ds_version=ds_version,
pyver=pyver,
m_or_mu=m_or_mu,
platform=plat,
arch=arch
)

ctc_arch = args.arch + '-ctc'

print(get_tc_url(ctc_arch, artifact, args.branch))
sys.exit(0)

if args.source is not None:
if args.source in DEFAULT_SCHEMES:
global TASKCLUSTER_SCHEME
Expand Down

0 comments on commit c428acf

Please sign in to comment.