From 16cecf5954a5f861b47e68a1b1a23af2cef58c39 Mon Sep 17 00:00:00 2001 From: ZailiWang Date: Fri, 24 May 2024 11:59:36 +0800 Subject: [PATCH] upgrade IPEX runtime to r2.3 --- runtime/core/cmake/ipex.cmake | 23 +++++++++++++---------- runtime/ipex/CMakeLists.txt | 6 +++--- runtime/ipex/README.md | 22 ++++++++++++++++------ runtime/ipex/docker/Dockerfile | 4 ++-- 4 files changed, 34 insertions(+), 21 deletions(-) diff --git a/runtime/core/cmake/ipex.cmake b/runtime/core/cmake/ipex.cmake index 14542f42eb..33a1147bc3 100644 --- a/runtime/core/cmake/ipex.cmake +++ b/runtime/core/cmake/ipex.cmake @@ -4,12 +4,15 @@ if(NOT ${CMAKE_SYSTEM_NAME} STREQUAL "Linux") message(FATAL_ERROR "Intel Extension For PyTorch supports only Linux for now") endif() +set(TORCH_VERSION "2.3.0") +set(IPEX_VERSION "2.3.0") + if(CXX11_ABI) - set(LIBTORCH_URL "https://download.pytorch.org/libtorch/cpu/libtorch-cxx11-abi-shared-with-deps-2.0.1%2Bcpu.zip") - set(URL_HASH "SHA256=137a842d1cf1e9196b419390133a1623ef92f8f84dc7a072f95ada684f394afd") + set(LIBTORCH_URL "https://download.pytorch.org/libtorch/cpu/libtorch-cxx11-abi-shared-with-deps-${TORCH_VERSION}%2Bcpu.zip") + set(URL_HASH "SHA256=f60009d2a74b6c8bdb174e398c70d217b7d12a4d3d358cd1db0690b32f6e193b") else() - set(LIBTORCH_URL "https://download.pytorch.org/libtorch/cpu/libtorch-shared-with-deps-2.0.1%2Bcpu.zip") - set(URL_HASH "SHA256=90d50350fd24ce5cf9dfbf47888d0cfd9f943eb677f481b86fe1b8e90f7fda5d") + set(LIBTORCH_URL "https://download.pytorch.org/libtorch/cpu/libtorch-shared-with-deps-${TORCH_VERSION}%2Bcpu.zip") + set(URL_HASH "SHA256=6b78aff4e586991bb2e040c02b2cfd73bc740059b9d12bcc1c1d7b3c86d2ab88") endif() FetchContent_Declare(libtorch URL ${LIBTORCH_URL} @@ -19,13 +22,13 @@ FetchContent_MakeAvailable(libtorch) find_package(Torch REQUIRED PATHS ${libtorch_SOURCE_DIR} NO_DEFAULT_PATH) if(CXX11_ABI) - set(LIBIPEX_URL "https://intel-optimized-pytorch.s3.cn-north-1.amazonaws.com.cn/libipex/cpu/libintel-ext-pt-cxx11-abi-2.0.100%2Bcpu.run") - set(URL_HASH "SHA256=f172d9ebc2ca0c39cc93bb395721194f79767e1bc3f82b13e1edc07d1530a600") - set(LIBIPEX_SCRIPT_NAME "libintel-ext-pt-cxx11-abi-2.0.100%2Bcpu.run") + set(LIBIPEX_URL "https://intel-optimized-pytorch.s3.cn-north-1.amazonaws.com.cn/libipex/cpu/libintel-ext-pt-cxx11-abi-${IPEX_VERSION}%2Bcpu.run") + set(URL_HASH "SHA256=8aa3c7c37f5cc2cba450947ca04f565fccb86c3bb98f592142375cfb9016f0d6") + set(LIBIPEX_SCRIPT_NAME "libintel-ext-pt-cxx11-abi-${IPEX_VERSION}%2Bcpu.run") else() - set(LIBIPEX_URL "https://intel-optimized-pytorch.s3.cn-north-1.amazonaws.com.cn/libipex/cpu/libintel-ext-pt-2.0.100%2Bcpu.run") - set(URL_HASH "SHA256=8392f965dd9b8f6c0712acbb805c7e560e4965a0ade279b47a5f5a8363888268") - set(LIBIPEX_SCRIPT_NAME "libintel-ext-pt-2.0.100%2Bcpu.run") + set(LIBIPEX_URL "https://intel-optimized-pytorch.s3.cn-north-1.amazonaws.com.cn/libipex/cpu/libintel-ext-pt-${IPEX_VERSION}%2Bcpu.run") + set(URL_HASH "SHA256=fecb6244a6cd38ca2d73a45272a6ad8527d1ec2caca512d919daa80adb621814") + set(LIBIPEX_SCRIPT_NAME "libintel-ext-pt-${IPEX_VERSION}%2Bcpu.run") endif() FetchContent_Declare(intel_ext_pt URL ${LIBIPEX_URL} diff --git a/runtime/ipex/CMakeLists.txt b/runtime/ipex/CMakeLists.txt index c51ff02f9e..0b46931c8e 100644 --- a/runtime/ipex/CMakeLists.txt +++ b/runtime/ipex/CMakeLists.txt @@ -2,7 +2,7 @@ cmake_minimum_required(VERSION 3.14 FATAL_ERROR) project(wenet VERSION 0.1) -option(CXX11_ABI "whether to use CXX11_ABI libtorch" OFF) +option(CXX11_ABI "whether to use CXX11_ABI libtorch" ON) option(GRAPH_TOOLS "whether to build TLG graph tools" OFF) option(BUILD_TESTING "whether to build unit test" ON) @@ -21,7 +21,7 @@ set(FETCHCONTENT_BASE_DIR ${fc_base}) list(APPEND CMAKE_MODULE_PATH ${CMAKE_CURRENT_SOURCE_DIR}/cmake) -set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++14 -Ofast -mavx2 -mfma -pthread -fPIC") +set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++17 -Ofast -mavx2 -mfma -pthread -fPIC") # Include all dependency include(ipex) @@ -30,7 +30,7 @@ include_directories( ${CMAKE_CURRENT_SOURCE_DIR} ${CMAKE_CURRENT_SOURCE_DIR}/kaldi ) - include(wetextprocessing) +include(wetextprocessing) # Build all libraries add_subdirectory(utils) diff --git a/runtime/ipex/README.md b/runtime/ipex/README.md index a755351207..c1dabc4762 100644 --- a/runtime/ipex/README.md +++ b/runtime/ipex/README.md @@ -1,6 +1,6 @@ ## WeNet Server (x86) ASR Demo With Intel® Extension for PyTorch\* Optimization -[Intel® Extension for PyTorch\*](https://github.com/intel/intel-extension-for-pytorch) (IPEX) extends [PyTorch\*](https://pytorch.org/) with up-to-date optimization features for extra performance boost on Intel hardware. The optimizations take advantage of AVX-512, Vector Neural Network Instructions (AVX512 VNNI) and Intel® Advanced Matrix Extensions (Intel® AMX) on Intel CPUs as well as Intel Xe Matrix Extensions (XMX) AI engines on Intel discrete GPUs. +[Intel® Extension for PyTorch\*](https://github.com/intel/intel-extension-for-pytorch) (IPEX) extends [PyTorch\*](https://pytorch.org/) with up-to-date optimization features for extra performance boost on Intel hardware. The optimizations take advantage of AVX-512, Vector Neural Network Instructions (AVX512 VNNI) and Intel® Advanced Matrix Extensions (Intel® AMX) on Intel CPUs as well as Intel Xe Matrix Extensions (XMX) AI engines on Intel discrete GPUs. In the following we are introducing how to accelerate WeNet model inference performance on Intel® CPU machines with the adoption of Intel® Extension for PyTorch\*. The adoption mainly includes the export of pretrained models with IPEX optimization, as well as the buildup of WeNet runtime executables with IPEX C++ SDK. The buildup can be processed from local source code, or directly build and run a docker container in which the runtime binaries are ready. @@ -39,7 +39,8 @@ docker run --rm -v $PWD/docker_resource:/home/wenet/runtime/ipex/docker_resource ``` * Step 4. Test in docker container -``` + +```sh cd /home/wenet/runtime/ipex export GLOG_logtostderr=1 export GLOG_v=2 @@ -57,15 +58,18 @@ model_dir=docker_resource/model * Step 1. Environment Setup. WeNet code cloning and default dependencies installation + ``` sh git clone https://github.com/wenet-e2e/wenet cd wenet pip install -r requirements.txt ``` + Upgrading of PyTorch and TorchAudio, followed by the installation of IPEX + ``` sh -pip install torch==2.0.1 torchaudio==2.0.2 --index-url https://download.pytorch.org/whl/cpu --force-reinstall -pip install intel_extension_for_pytorch==2.0.100 +pip install torch==2.3.0 torchaudio==2.3.0 --index-url https://download.pytorch.org/whl/cpu --force-reinstall +pip install intel_extension_for_pytorch==2.3.0 ``` Installation of related tools: Intel® OpenMP and TCMalloc @@ -83,6 +87,7 @@ based on the package manager of your system. * Step 3. Export the pretrained model with IPEX optimization. For exporting FP32 runtime model + ``` sh source examples/aishell/s0/path.sh export OMP_NUM_THREADS=1 @@ -91,7 +96,9 @@ python wenet/bin/export_ipex.py \ --checkpoint \ --output_file ``` + If you have an Intel® 4th Generation Xeon (Sapphire Rapids) server, you can export a BF16 runtime model and get better performance by virtue of [AMX instructions](https://en.wikipedia.org/wiki/Advanced_Matrix_Extensions) + ``` sh source examples/aishell/s0/path.sh export OMP_NUM_THREADS=1 @@ -101,7 +108,9 @@ python wenet/bin/export_ipex.py \ --output_file \ --dtype bf16 ``` + And for exporting int8 quantized runtime model + ``` sh source examples/aishell/s0/path.sh export OMP_NUM_THREADS=1 @@ -132,6 +141,7 @@ ipexrun --no-python \ --model_path $model_dir/ \ --unit_path $model_dir/units.txt 2>&1 | tee log.txt ``` -NOTE: Please refer [IPEX Launch Script Usage Guide](https://intel.github.io/intel-extension-for-pytorch/cpu/2.0.100+cpu/tutorials/performance_tuning/launch_script.html) for usage of advanced features. -For advanced usage of WeNet, such as building Web/RPC/HTTP services, please refer [LibTorch Tutorial](../libtorch#advanced-usage). The difference is that the executables should be invoked via IPEX launch script `ipexrun`. \ No newline at end of file +NOTE: Please refer [IPEX Launch Script Usage Guide](https://intel.github.io/intel-extension-for-pytorch/cpu/2.3.0+cpu/tutorials/performance_tuning/launch_script.html) for usage of advanced features. + +For advanced usage of WeNet, such as building Web/RPC/HTTP services, please refer [LibTorch Tutorial](../libtorch#advanced-usage). The difference is that the executables should be invoked via IPEX launch script `ipexrun`. diff --git a/runtime/ipex/docker/Dockerfile b/runtime/ipex/docker/Dockerfile index 854a33ceb7..184872ba48 100644 --- a/runtime/ipex/docker/Dockerfile +++ b/runtime/ipex/docker/Dockerfile @@ -2,8 +2,8 @@ FROM ubuntu:22.04 ENV DEBIAN_FRONTEND=noninteractive RUN apt-get update && apt-get install -y git cmake wget build-essential python-is-python3 python3-pip google-perftools -RUN pip install torch==2.0.1 torchaudio==2.0.2 --index-url https://download.pytorch.org/whl/cpu -RUN pip install intel_extension_for_pytorch==2.0.100 pyyaml six intel-openmp +RUN pip install torch==2.3.0 torchaudio==2.3.0 --index-url https://download.pytorch.org/whl/cpu +RUN pip install intel_extension_for_pytorch==2.3.0 pyyaml six intel-openmp RUN ln -s /usr/lib/x86_64-linux-gnu/libtcmalloc.so.4 /usr/lib/x86_64-linux-gnu/libtcmalloc.so RUN git clone https://github.com/wenet-e2e/wenet.git /home/wenet