Skip to content

Commit

Permalink
Add FL Decoder / KenLM integration to build process (#2078)
Browse files Browse the repository at this point in the history
Summary:
After all the C++ code from #2072 are added, this commit will enable decoder/KenLM integration in the build process.

Pull Request resolved: #2078

Reviewed By: carolineechen

Differential Revision: D33198183

Pulled By: mthrok

fbshipit-source-id: 9d7fa76151d06fbbac3785183c7c2ff9862d3128
  • Loading branch information
mthrok authored and facebook-github-bot committed Dec 18, 2021
1 parent 396d16c commit 246dd52
Show file tree
Hide file tree
Showing 12 changed files with 109 additions and 471 deletions.
2 changes: 1 addition & 1 deletion CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,7 @@ endif()
option(BUILD_SOX "Build libsox statically" ON)
option(BUILD_KALDI "Build kaldi statically" ON)
option(BUILD_RNNT "Enable RNN transducer" ON)
option(BUILD_KENLM "Build KenLM statically" ON)
option(BUILD_CTC_DECODER "Build Flashlight CTC decoder" ON)
option(BUILD_TORCHAUDIO_PYTHON_EXTENSION "Build Python extension" OFF)
option(USE_CUDA "Enable CUDA support" OFF)
option(USE_ROCM "Enable ROCM support" OFF)
Expand Down
2 changes: 1 addition & 1 deletion third_party/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ set_property(GLOBAL PROPERTY TORCHAUDIO_THIRD_PARTIES "${TORCHAUDIO_THIRD_PARTIE
################################################################################
# KenLM
################################################################################
if (BUILD_KENLM)
if (BUILD_CTC_DECODER)
add_subdirectory(zlib)
add_subdirectory(bzip2)
add_subdirectory(lzma)
Expand Down
16 changes: 6 additions & 10 deletions third_party/boost/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -1,17 +1,18 @@
include(ExternalProject)

set(INSTALL_DIR ${CMAKE_CURRENT_SOURCE_DIR}/../install)
set(INCLUDE_DIR ${CMAKE_CURRENT_SOURCE_DIR}/../install/include)
set(ARCHIVE_DIR ${CMAKE_CURRENT_SOURCE_DIR}/../archives)

ExternalProject_Add(boost_

ExternalProject_Add(boost
PREFIX ${CMAKE_CURRENT_BINARY_DIR}
DOWNLOAD_DIR ${ARCHIVE_DIR}
URL https://boostorg.jfrog.io/artifactory/main/release/1.78.0/source/boost_1_78_0.tar.gz
URL_HASH SHA256=94ced8b72956591c4775ae2207a9763d3600b30d9d7446562c552f0a14a63be7
BUILD_IN_SOURCE 1
CONFIGURE_COMMAND ./bootstrap.sh --with-libraries=thread --prefix=${INSTALL_DIR}
BUILD_COMMAND ./b2 link=static
INSTALL_COMMAND ./b2 link=static install
CONFIGURE_COMMAND ""
BUILD_COMMAND ""
INSTALL_COMMAND ${CMAKE_COMMAND} -E copy_directory boost ${INCLUDE_DIR}/boost
DOWNLOAD_NO_PROGRESS ON
LOG_DOWNLOAD ON
LOG_UPDATE ON
Expand All @@ -21,8 +22,3 @@ ExternalProject_Add(boost_
LOG_MERGED_STDOUTERR ON
LOG_OUTPUT_ON_FAILURE ON
)

add_library(boost INTERFACE)
add_dependencies(boost boost_)
target_include_directories(boost INTERFACE ${INSTALL_DIR}/include)
target_link_libraries(boost INTERFACE ${INSTALL_DIR}/lib/libboost_thread.a)
13 changes: 10 additions & 3 deletions third_party/bzip2/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -3,13 +3,20 @@ include(ExternalProject)
set(INSTALL_DIR ${CMAKE_CURRENT_SOURCE_DIR}/../install)
set(ARCHIVE_DIR ${CMAKE_CURRENT_SOURCE_DIR}/../archives)

ExternalProject_Add(bzip2_
set(
BZIP_LIBRARIES
${INSTALL_DIR}/lib/libbz2.a
)

ExternalProject_Add(bzip2-
PREFIX ${CMAKE_CURRENT_BINARY_DIR}
DOWNLOAD_DIR ${ARCHIVE_DIR}
URL https://sourceware.org/pub/bzip2/bzip2-1.0.8.tar.gz
URL_HASH SHA256=ab5a03176ee106d3f0fa90e381da478ddae405918153cca248e682cd0c4a2269
BUILD_BYPRODUCTS ${BZIP_LIBRARIES}
BUILD_IN_SOURCE 1
CONFIGURE_COMMAND ""
BUILD_COMMAND make VERBOSE=1 "CFLAGS=-fPIC -fvisibility=hidden -Wall -Winline -O2 -g -D_FILE_OFFSET_BITS=64"
INSTALL_COMMAND make install PREFIX=${INSTALL_DIR}
DOWNLOAD_NO_PROGRESS ON
LOG_DOWNLOAD ON
Expand All @@ -22,6 +29,6 @@ ExternalProject_Add(bzip2_
)

add_library(bzip2 INTERFACE)
add_dependencies(bzip2 bzip2_)
add_dependencies(bzip2 bzip2-)
target_include_directories(bzip2 INTERFACE ${INSTALL_DIR}/include)
target_link_libraries(bzip2 INTERFACE ${INSTALL_DIR}/lib/libbz2.a)
target_link_libraries(bzip2 INTERFACE ${BZIP_LIBRARIES})
17 changes: 12 additions & 5 deletions third_party/lzma/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -10,15 +10,22 @@ set(ARCHIVE_DIR ${CMAKE_CURRENT_SOURCE_DIR}/../archives)
set(envs
"PKG_CONFIG_PATH=${INSTALL_DIR}/lib/pkgconfig"
"LDFLAGS=-L${INSTALL_DIR}/lib $ENV{LDFLAGS}"
"CFLAGS=-I${INSTALL_DIR}/include -fvisibility=hidden $ENV{CFLAGS}"
"CFLAGS=-I${INSTALL_DIR}/include -fvisibility=hidden -fPIC $ENV{CFLAGS}"
)

ExternalProject_Add(lzma_
set(
LZMA_LIBRARIES
${INSTALL_DIR}/lib/liblzma.a
)

ExternalProject_Add(lzma-
PREFIX ${CMAKE_CURRENT_BINARY_DIR}
DOWNLOAD_DIR ${ARCHIVE_DIR}
URL https://tukaani.org/xz/xz-5.2.5.tar.gz
URL_HASH SHA256=f6f4910fd033078738bd82bfba4f49219d03b17eb0794eb91efbae419f4aba10
CONFIGURE_COMMAND ${CMAKE_COMMAND} -E env ${envs} ${CMAKE_CURRENT_BINARY_DIR}/src/lzma_/configure --prefix=${INSTALL_DIR} --disable-xz --disable-xzdec --disable-lzmadec --disable-lzmainfo --disable-lzma-links --disable-scripts --disable-doc --enable-static --disable-shared
BUILD_BYPRODUCTS ${LZMA_LIBRARIES}
CONFIGURE_COMMAND ${CMAKE_COMMAND} -E env ${envs} ${CMAKE_CURRENT_BINARY_DIR}/src/lzma-/configure --prefix=${INSTALL_DIR} --disable-xz --disable-xzdec --disable-lzmadec --disable-lzmainfo --disable-lzma-links --disable-scripts --disable-doc --enable-static --disable-shared
BUILD_COMMAND ${CMAKE_COMMAND} -E env ${envs} make VERBOSE=1
DOWNLOAD_NO_PROGRESS ON
LOG_DOWNLOAD ON
LOG_UPDATE ON
Expand All @@ -31,6 +38,6 @@ ExternalProject_Add(lzma_


add_library(lzma INTERFACE)
add_dependencies(lzma lzma_)
add_dependencies(lzma lzma-)
target_include_directories(lzma INTERFACE ${INSTALL_DIR}/include)
target_link_libraries(lzma INTERFACE ${INSTALL_DIR}/lib/liblzma.a)
target_link_libraries(lzma INTERFACE ${LZMA_LIBRARIES})
18 changes: 12 additions & 6 deletions third_party/zlib/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -10,17 +10,23 @@ set(ARCHIVE_DIR ${CMAKE_CURRENT_SOURCE_DIR}/../archives)
set(envs
"PKG_CONFIG_PATH=${INSTALL_DIR}/lib/pkgconfig"
"LDFLAGS=-L${INSTALL_DIR}/lib $ENV{LDFLAGS}"
"CFLAGS=-I${INSTALL_DIR}/include -fvisibility=hidden $ENV{CFLAGS}"
"CFLAGS=-I${INSTALL_DIR}/include -fvisibility=hidden -fPIC $ENV{CFLAGS}"
"prefix=${INSTALL_DIR}"
)

ExternalProject_Add(zlib_
set(
ZLIB_LIBRARIES
${INSTALL_DIR}/lib/libz.a
)

ExternalProject_Add(zlib-
PREFIX ${CMAKE_CURRENT_BINARY_DIR}
DOWNLOAD_DIR ${ARCHIVE_DIR}
URL https://zlib.net/zlib-1.2.11.tar.gz
URL_HASH SHA256=c3e5e9fdd5004dcb542feda5ee4f0ff0744628baf8ed2dd5d66f8ca1197cb1a1
CONFIGURE_COMMAND ${CMAKE_COMMAND} -E env ${envs} ${CMAKE_CURRENT_BINARY_DIR}/src/zlib_/configure --static
DOWNLOAD_NO_PROGRESS ON
BUILD_BYPRODUCTS ${ZLIB_LIBRARIES}
CONFIGURE_COMMAND ${CMAKE_COMMAND} -E env ${envs} ${CMAKE_CURRENT_BINARY_DIR}/src/zlib-/configure --static
BUILD_COMMAND ${CMAKE_COMMAND} -E env ${envs} make VERBOSE=1
LOG_DOWNLOAD ON
LOG_UPDATE ON
LOG_CONFIGURE ON
Expand All @@ -31,6 +37,6 @@ ExternalProject_Add(zlib_
)

add_library(zlib INTERFACE)
add_dependencies(zlib zlib_)
add_dependencies(zlib zlib-)
target_include_directories(zlib INTERFACE ${INSTALL_DIR}/include)
target_link_libraries(zlib INTERFACE ${INSTALL_DIR}/lib/libz.a)
target_link_libraries(zlib INTERFACE ${ZLIB_LIBRARIES})
12 changes: 9 additions & 3 deletions tools/setup_helpers/extension.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@ def _get_build(var, default=False):
_BUILD_SOX = False if platform.system() == 'Windows' else _get_build("BUILD_SOX", True)
_BUILD_KALDI = False if platform.system() == 'Windows' else _get_build("BUILD_KALDI", True)
_BUILD_RNNT = _get_build("BUILD_RNNT", True)
_BUILD_KENLM = False if platform.system() == 'Windows' else _get_build("BUILD_KENLM", True)
_BUILD_CTC_DECODER = False if platform.system() == 'Windows' else _get_build("BUILD_CTC_DECODER", True)
_USE_ROCM = _get_build("USE_ROCM", torch.cuda.is_available() and torch.version.hip is not None)
_USE_CUDA = _get_build("USE_CUDA", torch.cuda.is_available() and torch.version.hip is None)
_USE_OPENMP = _get_build("USE_OPENMP", True) and \
Expand All @@ -46,10 +46,16 @@ def _get_build(var, default=False):


def get_ext_modules():
return [
modules = [
Extension(name='torchaudio.lib.libtorchaudio', sources=[]),
Extension(name='torchaudio._torchaudio', sources=[]),
]
if _BUILD_CTC_DECODER:
modules.extend([
Extension(name='torchaudio.lib.libtorchaudio_decoder', sources=[]),
Extension(name='torchaudio._torchaudio_decoder', sources=[]),
])
return modules


# Based off of
Expand Down Expand Up @@ -90,7 +96,7 @@ def build_extension(self, ext):
f"-DBUILD_SOX:BOOL={'ON' if _BUILD_SOX else 'OFF'}",
f"-DBUILD_KALDI:BOOL={'ON' if _BUILD_KALDI else 'OFF'}",
f"-DBUILD_RNNT:BOOL={'ON' if _BUILD_RNNT else 'OFF'}",
f"-DBUILD_KENLM:BOOL={'ON' if _BUILD_KENLM else 'OFF'}",
f"-DBUILD_CTC_DECODER:BOOL={'ON' if _BUILD_CTC_DECODER else 'OFF'}",
"-DBUILD_TORCHAUDIO_PYTHON_EXTENSION:BOOL=ON",
f"-DUSE_ROCM:BOOL={'ON' if _USE_ROCM else 'OFF'}",
f"-DUSE_CUDA:BOOL={'ON' if _USE_CUDA else 'OFF'}",
Expand Down
57 changes: 57 additions & 0 deletions torchaudio/csrc/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -126,6 +126,51 @@ define_library(
"${LIBTORCHAUDIO_COMPILE_DEFINITIONS}"
)


################################################################################
# libtorchaudio_decoder.so
################################################################################
if (BUILD_CTC_DECODER)
set(
LIBTORCHAUDIO_DECODER_SOURCES
decoder/src/decoder/Decoder.h
decoder/src/decoder/LexiconDecoder.cpp
decoder/src/decoder/LexiconDecoder.h
decoder/src/decoder/Trie.cpp
decoder/src/decoder/Trie.h
decoder/src/decoder/Utils.cpp
decoder/src/decoder/Utils.h
decoder/src/decoder/lm/KenLM.cpp
decoder/src/decoder/lm/KenLM.h
decoder/src/decoder/lm/LM.h
decoder/src/dictionary/Defines.h
decoder/src/dictionary/Dictionary.cpp
decoder/src/dictionary/Dictionary.h
decoder/src/dictionary/String.cpp
decoder/src/dictionary/String.h
decoder/src/dictionary/System.cpp
decoder/src/dictionary/System.h
decoder/src/dictionary/Utils.cpp
decoder/src/dictionary/Utils.h
)
set(
LIBTORCHAUDIO_DECODER_DEFINITIONS
BUILD_CTC_DECODER
)
set(
LIBTORCHAUDIO_DECODER_DEPS
libtorchaudio
kenlm)
define_library(
libtorchaudio_decoder
"${LIBTORCHAUDIO_DECODER_SOURCES}"
"${PROJECT_SOURCE_DIR}"
"${LIBTORCHAUDIO_DECODER_DEPS}"
"${LIBTORCHAUDIO_COMPILE_DEFINITIONS};${LIBTORCHAUDIO_DECODER_DEFINITIONS}"
)
endif()

# TODO: Add libtorchaudio_decoder
if (APPLE)
set(TORCHAUDIO_LIBRARY libtorchaudio CACHE INTERNAL "")
else()
Expand Down Expand Up @@ -195,4 +240,16 @@ if (BUILD_TORCHAUDIO_PYTHON_EXTENSION)
libtorchaudio
"${LIBTORCHAUDIO_COMPILE_DEFINITIONS}"
)
if(BUILD_CTC_DECODER)
set(
DECODER_EXTENSION_SOURCES
decoder/bindings/pybind.cpp
)
define_extension(
_torchaudio_decoder
"${DECODER_EXTENSION_SOURCES}"
"libtorchaudio;libtorchaudio_decoder"
"${LIBTORCHAUDIO_DECODER_DEFINITIONS}"
)
endif()
endif()
19 changes: 0 additions & 19 deletions torchaudio/csrc/decoder/bindings/cmake/Buildpybind11.cmake

This file was deleted.

Loading

0 comments on commit 246dd52

Please sign in to comment.