WIP remove old CTC code

mozilla · Nov 8, 2018 · c5d6c6d · c5d6c6d
1 parent cf4ba3d
commit c5d6c6d
Show file tree

Hide file tree

Showing 130 changed files with 1,146 additions and 14,030 deletions.
diff --git a/.compute b/.compute
@@ -21,5 +21,4 @@ python3 -u DeepSpeech.py \
   --display_step 0 \
   --validation_step 1 \
   --checkpoint_dir "../keep" \
-  --summary_dir "../keep/summaries" \
-  --decoder_library_path "../tmp/native_client/libctc_decoder_with_kenlm.so"
+  --summary_dir "../keep/summaries"
diff --git a/.gitattributes b/.gitattributes
@@ -1,4 +1,3 @@
 *.binary filter=lfs diff=lfs merge=lfs -crlf
 data/lm/trie filter=lfs diff=lfs merge=lfs -crlf
 data/lm/vocab.txt filter=lfs diff=lfs merge=lfs -text
-data/lm/trie.ctcdecode filter=lfs diff=lfs merge=lfs -text
diff --git a/.install b/.install
@@ -7,4 +7,10 @@ pip install tensorflow-gpu==1.12.0rc2
 
 python3 util/taskcluster.py --arch gpu --target ../tmp/native_client
 
+# Install ds_ctcdecoder package from TaskCluster
+VERSION=$(python -c 'import pkg_resources; print(pkg_resources.safe_version(open("VERSION").read()))')
+PYVER=$(python -c 'import sys; print("cp{0}{1}-cp{0}{1}m".format(sys.version_info.major, sys.version_info.minor))')
+python3 util/taskcluster.py --arch cpu --target ../tmp --artifact "ds_ctcdecoder-${VERSION}-${PYVER}-manylinux1_x86_64.whl"
+pip install ../tmp/ds_ctcdecoder-*.whl
+
 mkdir -p ../keep/summaries
diff --git a/DeepSpeech.py b/DeepSpeech.py
diff --git a/Dockerfile b/Dockerfile
@@ -165,9 +165,6 @@ RUN ./configure
 # passing LD_LIBRARY_PATH is required cause Bazel doesn't pickup it from environment
 
 
-# Build LM Prefix Decoder, CPU only - no need for CUDA flag
-RUN bazel build -c opt --copt=-O3 --copt="-D_GLIBCXX_USE_CXX11_ABI=0" --copt=-mtune=generic --copt=-march=x86-64 --copt=-msse --copt=-msse2 --copt=-msse3 --copt=-msse4.1 --copt=-msse4.2 --copt=-mavx //native_client:libctc_decoder_with_kenlm.so  --verbose_failures --action_env=LD_LIBRARY_PATH=${LD_LIBRARY_PATH}
-
 # Build DeepSpeech
 RUN bazel build --config=monolithic --config=cuda -c opt --copt=-O3 --copt="-D_GLIBCXX_USE_CXX11_ABI=0" --copt=-mtune=generic --copt=-march=x86-64 --copt=-msse --copt=-msse2 --copt=-msse3 --copt=-msse4.1 --copt=-msse4.2 --copt=-mavx --copt=-fvisibility=hidden //native_client:libdeepspeech.so //native_client:generate_trie --verbose_failures --action_env=LD_LIBRARY_PATH=${LD_LIBRARY_PATH}
 
@@ -184,8 +181,7 @@ RUN bazel build --config=monolithic --config=cuda -c opt --copt=-O3 --copt="-D_G
 # RUN pip install /tmp/tensorflow_pkg/*.whl
 
 # Copy built libs to /DeepSpeech/native_client
-RUN cp /tensorflow/bazel-bin/native_client/libctc_decoder_with_kenlm.so /DeepSpeech/native_client/ \
-    && cp /tensorflow/bazel-bin/native_client/generate_trie /DeepSpeech/native_client/ \
+RUN cp /tensorflow/bazel-bin/native_client/generate_trie /DeepSpeech/native_client/ \
     && cp /tensorflow/bazel-bin/native_client/libdeepspeech.so /DeepSpeech/native_client/
 
 # Install TensorFlow
@@ -200,6 +196,9 @@ RUN make deepspeech
 WORKDIR /DeepSpeech/native_client/python
 RUN make bindings
 RUN pip install dist/deepspeech*
+WORKDIR /DeepSpeech/native_client/ctcdecode
+RUN make
+RUN pip install dist/*.whl
 
 
 # << END Build and bind

diff --git a/bin/run-tc-ldc93s1_checkpoint.sh b/bin/run-tc-ldc93s1_checkpoint.sh
@@ -19,7 +19,6 @@ python -u DeepSpeech.py --noshow_progressbar \
   --n_hidden 494 --epoch -1 --random_seed 4567 --default_stddev 0.046875 \
   --max_to_keep 1 --checkpoint_dir '/tmp/ckpt' \
   --learning_rate 0.001 --dropout_rate 0.05 \
-  --decoder_library_path '/tmp/ds/libctc_decoder_with_kenlm.so' \
   --lm_binary_path 'data/smoke_test/vocab.pruned.lm' \
   --lm_trie_path 'data/smoke_test/vocab.trie' | tee /tmp/resume.log
 

diff --git a/bin/run-tc-ldc93s1_new.sh b/bin/run-tc-ldc93s1_new.sh
@@ -20,6 +20,5 @@ python -u DeepSpeech.py \
   --default_stddev 0.046875 --max_to_keep 1 \
   --checkpoint_dir '/tmp/ckpt' \
   --learning_rate 0.001 --dropout_rate 0.05  --export_dir '/tmp/train' \
-  --decoder_library_path '/tmp/ds/libctc_decoder_with_kenlm.so' \
   --lm_binary_path 'data/smoke_test/vocab.pruned.lm' \
   --lm_trie_path 'data/smoke_test/vocab.trie' \
diff --git a/bin/run-tc-ldc93s1_singleshotinference.sh b/bin/run-tc-ldc93s1_singleshotinference.sh
@@ -17,7 +17,6 @@ python -u DeepSpeech.py \
   --n_hidden 494 --epoch 1 --random_seed 4567 --default_stddev 0.046875 \
   --max_to_keep 1 --checkpoint_dir '/tmp/ckpt' --checkpoint_secs 0 \
   --learning_rate 0.001 --dropout_rate 0.05 \
-  --decoder_library_path '/tmp/ds/libctc_decoder_with_kenlm.so' \
   --lm_binary_path 'data/smoke_test/vocab.pruned.lm' \
   --lm_trie_path 'data/smoke_test/vocab.trie'
 
@@ -28,7 +27,6 @@ python -u DeepSpeech.py \
   --n_hidden 494 --epoch 1 --random_seed 4567 --default_stddev 0.046875 \
   --max_to_keep 1 --checkpoint_dir '/tmp/ckpt' --checkpoint_secs 0 \
   --learning_rate 0.001 --dropout_rate 0.05 \
-  --decoder_library_path '/tmp/ds/libctc_decoder_with_kenlm.so' \
   --lm_binary_path 'data/smoke_test/vocab.pruned.lm' \
   --lm_trie_path 'data/smoke_test/vocab.trie' \
   --one_shot_infer 'data/smoke_test/LDC93S1.wav'
diff --git a/bin/run-tc-ldc93s1_tflite.sh b/bin/run-tc-ldc93s1_tflite.sh
@@ -14,7 +14,6 @@ python -u DeepSpeech.py \
   --n_hidden 494 \
   --checkpoint_dir '/tmp/ckpt' \
   --export_dir '/tmp/train' \
-  --decoder_library_path '/tmp/ds/libctc_decoder_with_kenlm.so' \
   --lm_binary_path 'data/smoke_test/vocab.pruned.lm' \
   --lm_trie_path 'data/smoke_test/vocab.trie' \
   --notrain --notest \

diff --git a/data/lm/trie b/data/lm/trie
diff --git a/data/lm/trie.ctcdecode b/data/lm/trie.ctcdecode
diff --git a/data/lm/vocab.txt b/data/lm/vocab.txt
diff --git a/data/smoke_test/vocab.trie b/data/smoke_test/vocab.trie
diff --git a/data/smoke_test/vocab.trie.ctcdecode b/data/smoke_test/vocab.trie.ctcdecode
diff --git a/evaluate.py b/evaluate.py
@@ -15,8 +15,10 @@
 from attrdict import AttrDict
 from collections import namedtuple
 from ds_ctcdecoder import ctc_beam_search_decoder_batch, Scorer
-from DeepSpeech import initialize_globals, create_flags, log_debug, log_info, log_warn, log_error, create_inference_graph
-from multiprocessing import Pool
+from util.flags import create_flags
+from util.coordinator import C, initialize_globals
+from util.logging import log_debug, log_info, log_warn, log_error
+from multiprocessing import Pool, cpu_count
 from six.moves import zip, range
 from util.audio import audiofile_to_input_vector
 from util.text import Alphabet, ctc_label_dense_to_sparse, wer, levenshtein
@@ -86,10 +88,10 @@ def calculate_report(labels, decodings, distances, losses):
     return samples_wer, samples
 
 
-def evaluate(test_data, alphabet):
-    scorer = Scorer(FLAGS.lm_weight, FLAGS.valid_word_count_weight,
+def evaluate(test_data, inference_graph, alphabet):
+    scorer = Scorer(FLAGS.lm_alpha, FLAGS.lm_beta,
                     FLAGS.lm_binary_path, FLAGS.lm_trie_path,
-                    alphabet)
+                    C.alphabet)
 
 
     def create_windows(features):
@@ -110,7 +112,7 @@ def create_windows(features):
     test_data['features'] = test_data['features'].apply(create_windows)
 
     with tf.Session() as session:
-        inputs, outputs, layers = create_inference_graph(batch_size=FLAGS.test_batch_size, n_steps=-1)
+        inputs, outputs, layers = inference_graph
 
         # Transpose to batch major for decoder
         transposed = tf.transpose(outputs['outputs'], [1, 0, 2])
@@ -172,7 +174,10 @@ def create_windows(features):
                                       widget=progressbar.AdaptiveETA)
 
         # Get number of accessible CPU cores for this process
-        num_processes = len(os.sched_getaffinity(0))
+        try:
+            num_processes = cpu_count()
+        except:
+            num_processes = 1
 
         # Second pass, decode logits and compute WER and edit distance metrics
         for logits, batch in bar(zip(logitses, split_data(test_data, FLAGS.test_batch_size))):
@@ -226,7 +231,10 @@ def main(_):
         by="features_len",
         ascending=False)
 
-    samples = evaluate(test_data, alphabet)
+    from DeepSpeech import create_inference_graph
+    graph = create_inference_graph(batch_size=FLAGS.test_batch_size, n_steps=-1)
+
+    samples = evaluate(test_data, graph, alphabet)
 
     if FLAGS.test_output_file:
         # Save decoded tuples as JSON, converting NumPy floats to Python floats

diff --git a/native_client/BUILD b/native_client/BUILD
@@ -12,11 +12,10 @@ genrule(
 
 KENLM_SOURCES = glob(["kenlm/lm/*.cc", "kenlm/util/*.cc", "kenlm/util/double-conversion/*.cc",
                       "kenlm/lm/*.hh", "kenlm/util/*.hh", "kenlm/util/double-conversion/*.h"],
-                     exclude = ["kenlm/*/*test.cc", "kenlm/*/*main.cc"]) + glob(["boost_locale/**/*.hpp"])
+                     exclude = ["kenlm/*/*test.cc", "kenlm/*/*main.cc"])
 
 KENLM_INCLUDES = [
     "kenlm",
-    "boost_locale"
 ]
 
 DECODER_SOURCES = glob([
@@ -102,24 +101,6 @@ tf_cc_shared_object(
     defines = ["KENLM_MAX_ORDER=6"],
 )
 
-tf_cc_shared_object(
-    name = "libctc_decoder_with_kenlm.so",
-    srcs = [
-            "beam_search.cc",
-            "beam_search.h",
-            "alphabet.h",
-            "trie_node.h"
-           ] +
-           KENLM_SOURCES,
-    includes = KENLM_INCLUDES,
-    copts = ["-std=c++11"],
-    defines = ["KENLM_MAX_ORDER=6"],
-    deps = ["//tensorflow/core:framework_headers_lib",
-            "//tensorflow/core/util/ctc",
-            "//third_party/eigen3",
-    ],
-)
-
 cc_binary(
     name = "generate_trie",
     srcs = [

diff --git a/native_client/README.md b/native_client/README.md
@@ -19,8 +19,6 @@ This will download and extract `native_client.tar.xz` which includes the deepspe
 
 If you want the CUDA capable version of the binaries, use `--arch gpu`. Note that for now we don't publish CUDA-capable macOS binaries.
 
-If you're looking to train a model, you now have a `libctc_decoder_with_kenlm.so` file that you can pass to the `--decoder_library_path` parameter of `DeepSpeech.py`.
-
 ## Required Dependencies
 
 Running inference might require some runtime dependencies to be already installed on your system. Those should be the same, whatever the bindings you are using:
@@ -77,10 +75,9 @@ Before building the DeepSpeech client libraries, you will need to prepare your e
 Preferably, checkout the version of tensorflow which is currently supported by DeepSpeech (see requirements.txt), and use the bazel version recommended by TensorFlow for that version.
 Then, follow the [instructions](https://www.tensorflow.org/install/install_sources) on the TensorFlow site for your platform, up to the end of 'Configure the installation'.
 
-After that, you can build the Tensorflow and DeepSpeech libraries using the following commands. Please note that the flags for `libctc_decoder_with_kenlm.so` differs a little bit.
+After that, you can build the Tensorflow and DeepSpeech libraries using the following command.
 
 ```
-bazel build -c opt --copt=-O3 --copt="-D_GLIBCXX_USE_CXX11_ABI=0" //native_client:libctc_decoder_with_kenlm.so
 bazel build --config=monolithic -c opt --copt=-O3 --copt="-D_GLIBCXX_USE_CXX11_ABI=0" --copt=-fvisibility=hidden //native_client:libdeepspeech.so //native_client:generate_trie
 ```