From 16cecf5954a5f861b47e68a1b1a23af2cef58c39 Mon Sep 17 00:00:00 2001
From: ZailiWang <zaili.wang@intel.com>
Date: Fri, 24 May 2024 11:59:36 +0800
Subject: [PATCH] upgrade IPEX runtime to r2.3

---
 runtime/core/cmake/ipex.cmake  | 23 +++++++++++++----------
 runtime/ipex/CMakeLists.txt    |  6 +++---
 runtime/ipex/README.md         | 22 ++++++++++++++++------
 runtime/ipex/docker/Dockerfile |  4 ++--
 4 files changed, 34 insertions(+), 21 deletions(-)

diff --git a/runtime/core/cmake/ipex.cmake b/runtime/core/cmake/ipex.cmake
index 14542f42eb..33a1147bc3 100644
--- a/runtime/core/cmake/ipex.cmake
+++ b/runtime/core/cmake/ipex.cmake
@@ -4,12 +4,15 @@ if(NOT ${CMAKE_SYSTEM_NAME} STREQUAL "Linux")
   message(FATAL_ERROR "Intel Extension For PyTorch supports only Linux for now")
 endif()
 
+set(TORCH_VERSION "2.3.0")
+set(IPEX_VERSION "2.3.0")
+
 if(CXX11_ABI)
-  set(LIBTORCH_URL "https://download.pytorch.org/libtorch/cpu/libtorch-cxx11-abi-shared-with-deps-2.0.1%2Bcpu.zip")
-  set(URL_HASH "SHA256=137a842d1cf1e9196b419390133a1623ef92f8f84dc7a072f95ada684f394afd")
+  set(LIBTORCH_URL "https://download.pytorch.org/libtorch/cpu/libtorch-cxx11-abi-shared-with-deps-${TORCH_VERSION}%2Bcpu.zip")
+  set(URL_HASH "SHA256=f60009d2a74b6c8bdb174e398c70d217b7d12a4d3d358cd1db0690b32f6e193b")
 else()
-  set(LIBTORCH_URL "https://download.pytorch.org/libtorch/cpu/libtorch-shared-with-deps-2.0.1%2Bcpu.zip")
-  set(URL_HASH "SHA256=90d50350fd24ce5cf9dfbf47888d0cfd9f943eb677f481b86fe1b8e90f7fda5d")
+  set(LIBTORCH_URL "https://download.pytorch.org/libtorch/cpu/libtorch-shared-with-deps-${TORCH_VERSION}%2Bcpu.zip")
+  set(URL_HASH "SHA256=6b78aff4e586991bb2e040c02b2cfd73bc740059b9d12bcc1c1d7b3c86d2ab88")
 endif()
 FetchContent_Declare(libtorch
 URL      ${LIBTORCH_URL}
@@ -19,13 +22,13 @@ FetchContent_MakeAvailable(libtorch)
 find_package(Torch REQUIRED PATHS ${libtorch_SOURCE_DIR} NO_DEFAULT_PATH)
 
 if(CXX11_ABI)
-  set(LIBIPEX_URL "https://intel-optimized-pytorch.s3.cn-north-1.amazonaws.com.cn/libipex/cpu/libintel-ext-pt-cxx11-abi-2.0.100%2Bcpu.run")
-  set(URL_HASH "SHA256=f172d9ebc2ca0c39cc93bb395721194f79767e1bc3f82b13e1edc07d1530a600")
-  set(LIBIPEX_SCRIPT_NAME "libintel-ext-pt-cxx11-abi-2.0.100%2Bcpu.run")
+  set(LIBIPEX_URL "https://intel-optimized-pytorch.s3.cn-north-1.amazonaws.com.cn/libipex/cpu/libintel-ext-pt-cxx11-abi-${IPEX_VERSION}%2Bcpu.run")
+  set(URL_HASH "SHA256=8aa3c7c37f5cc2cba450947ca04f565fccb86c3bb98f592142375cfb9016f0d6")
+  set(LIBIPEX_SCRIPT_NAME "libintel-ext-pt-cxx11-abi-${IPEX_VERSION}%2Bcpu.run")
 else()
-  set(LIBIPEX_URL "https://intel-optimized-pytorch.s3.cn-north-1.amazonaws.com.cn/libipex/cpu/libintel-ext-pt-2.0.100%2Bcpu.run")
-  set(URL_HASH "SHA256=8392f965dd9b8f6c0712acbb805c7e560e4965a0ade279b47a5f5a8363888268")
-  set(LIBIPEX_SCRIPT_NAME "libintel-ext-pt-2.0.100%2Bcpu.run")
+  set(LIBIPEX_URL "https://intel-optimized-pytorch.s3.cn-north-1.amazonaws.com.cn/libipex/cpu/libintel-ext-pt-${IPEX_VERSION}%2Bcpu.run")
+  set(URL_HASH "SHA256=fecb6244a6cd38ca2d73a45272a6ad8527d1ec2caca512d919daa80adb621814")
+  set(LIBIPEX_SCRIPT_NAME "libintel-ext-pt-${IPEX_VERSION}%2Bcpu.run")
 endif()
 FetchContent_Declare(intel_ext_pt
 URL                  ${LIBIPEX_URL}
diff --git a/runtime/ipex/CMakeLists.txt b/runtime/ipex/CMakeLists.txt
index c51ff02f9e..0b46931c8e 100644
--- a/runtime/ipex/CMakeLists.txt
+++ b/runtime/ipex/CMakeLists.txt
@@ -2,7 +2,7 @@ cmake_minimum_required(VERSION 3.14 FATAL_ERROR)
 
 project(wenet VERSION 0.1)
 
-option(CXX11_ABI "whether to use CXX11_ABI libtorch" OFF)
+option(CXX11_ABI "whether to use CXX11_ABI libtorch" ON)
 option(GRAPH_TOOLS "whether to build TLG graph tools" OFF)
 option(BUILD_TESTING "whether to build unit test" ON)
 
@@ -21,7 +21,7 @@ set(FETCHCONTENT_BASE_DIR ${fc_base})
 
 list(APPEND CMAKE_MODULE_PATH ${CMAKE_CURRENT_SOURCE_DIR}/cmake)
 
-set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++14 -Ofast -mavx2 -mfma -pthread -fPIC")
+set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++17 -Ofast -mavx2 -mfma -pthread -fPIC")
 
 # Include all dependency
 include(ipex)
@@ -30,7 +30,7 @@ include_directories(
   ${CMAKE_CURRENT_SOURCE_DIR}
   ${CMAKE_CURRENT_SOURCE_DIR}/kaldi
 )
- include(wetextprocessing)
+include(wetextprocessing)
 
 # Build all libraries
 add_subdirectory(utils)
diff --git a/runtime/ipex/README.md b/runtime/ipex/README.md
index a755351207..c1dabc4762 100644
--- a/runtime/ipex/README.md
+++ b/runtime/ipex/README.md
@@ -1,6 +1,6 @@
 ## WeNet Server (x86) ASR Demo With Intel® Extension for PyTorch\* Optimization
 
-[Intel® Extension for PyTorch\*](https://github.com/intel/intel-extension-for-pytorch) (IPEX) extends [PyTorch\*](https://pytorch.org/) with up-to-date  optimization features for extra performance boost on Intel hardware. The optimizations take advantage of AVX-512, Vector Neural Network Instructions (AVX512 VNNI) and Intel® Advanced Matrix Extensions (Intel® AMX) on Intel CPUs as well as Intel X<sup>e</sup> Matrix Extensions (XMX) AI engines on Intel discrete GPUs.
+[Intel® Extension for PyTorch\*](https://github.com/intel/intel-extension-for-pytorch) (IPEX) extends [PyTorch\*](https://pytorch.org/) with up-to-date optimization features for extra performance boost on Intel hardware. The optimizations take advantage of AVX-512, Vector Neural Network Instructions (AVX512 VNNI) and Intel® Advanced Matrix Extensions (Intel® AMX) on Intel CPUs as well as Intel X<sup>e</sup> Matrix Extensions (XMX) AI engines on Intel discrete GPUs.
 
 In the following we are introducing how to accelerate WeNet model inference performance on Intel® CPU machines with the adoption of Intel® Extension for PyTorch\*. The adoption mainly includes the export of pretrained models with IPEX optimization, as well as the buildup of WeNet runtime executables with IPEX C++ SDK. The buildup can be processed from local source code, or directly build and run a docker container in which the runtime binaries are ready.
 
@@ -39,7 +39,8 @@ docker run --rm -v $PWD/docker_resource:/home/wenet/runtime/ipex/docker_resource
 ```
 
 * Step 4. Test in docker container
-```
+
+```sh
 cd /home/wenet/runtime/ipex
 export GLOG_logtostderr=1
 export GLOG_v=2
@@ -57,15 +58,18 @@ model_dir=docker_resource/model
 * Step 1. Environment Setup.
 
 WeNet code cloning and default dependencies installation
+
 ``` sh
 git clone https://github.com/wenet-e2e/wenet
 cd wenet
 pip install -r requirements.txt
 ```
+
 Upgrading of PyTorch and TorchAudio, followed by the installation of IPEX
+
 ``` sh
-pip install torch==2.0.1 torchaudio==2.0.2 --index-url https://download.pytorch.org/whl/cpu --force-reinstall
-pip install intel_extension_for_pytorch==2.0.100
+pip install torch==2.3.0 torchaudio==2.3.0 --index-url https://download.pytorch.org/whl/cpu --force-reinstall
+pip install intel_extension_for_pytorch==2.3.0
 ```
 
 Installation of related tools: Intel® OpenMP and TCMalloc
@@ -83,6 +87,7 @@ based on the package manager of your system.
 * Step 3. Export the pretrained model with IPEX optimization.
 
 For exporting FP32 runtime model
+
 ``` sh
 source examples/aishell/s0/path.sh
 export OMP_NUM_THREADS=1
@@ -91,7 +96,9 @@ python wenet/bin/export_ipex.py \
     --checkpoint <model_ckpt_filename> \
     --output_file <runtime_model_output_filename>
 ```
+
 If you have an Intel® 4th Generation Xeon (Sapphire Rapids) server, you can export a BF16 runtime model and get better performance by virtue of [AMX instructions](https://en.wikipedia.org/wiki/Advanced_Matrix_Extensions)
+
 ``` sh
 source examples/aishell/s0/path.sh
 export OMP_NUM_THREADS=1
@@ -101,7 +108,9 @@ python wenet/bin/export_ipex.py \
     --output_file <runtime_model_output_filename> \
     --dtype bf16
 ```
+
 And for exporting int8 quantized runtime model
+
 ``` sh
 source examples/aishell/s0/path.sh
 export OMP_NUM_THREADS=1
@@ -132,6 +141,7 @@ ipexrun --no-python \
         --model_path $model_dir/<runtime_model_filename> \
         --unit_path $model_dir/units.txt 2>&1 | tee log.txt
 ```
-NOTE: Please refer [IPEX Launch Script Usage Guide](https://intel.github.io/intel-extension-for-pytorch/cpu/2.0.100+cpu/tutorials/performance_tuning/launch_script.html) for usage of advanced features.
 
-For advanced usage of WeNet, such as building Web/RPC/HTTP services, please refer [LibTorch Tutorial](../libtorch#advanced-usage). The difference is that the executables should be invoked via IPEX launch script `ipexrun`.
\ No newline at end of file
+NOTE: Please refer [IPEX Launch Script Usage Guide](https://intel.github.io/intel-extension-for-pytorch/cpu/2.3.0+cpu/tutorials/performance_tuning/launch_script.html) for usage of advanced features.
+
+For advanced usage of WeNet, such as building Web/RPC/HTTP services, please refer [LibTorch Tutorial](../libtorch#advanced-usage). The difference is that the executables should be invoked via IPEX launch script `ipexrun`.
diff --git a/runtime/ipex/docker/Dockerfile b/runtime/ipex/docker/Dockerfile
index 854a33ceb7..184872ba48 100644
--- a/runtime/ipex/docker/Dockerfile
+++ b/runtime/ipex/docker/Dockerfile
@@ -2,8 +2,8 @@ FROM ubuntu:22.04
 
 ENV DEBIAN_FRONTEND=noninteractive
 RUN apt-get update && apt-get install -y git cmake wget build-essential python-is-python3 python3-pip google-perftools
-RUN pip install torch==2.0.1 torchaudio==2.0.2 --index-url https://download.pytorch.org/whl/cpu
-RUN pip install intel_extension_for_pytorch==2.0.100 pyyaml six intel-openmp
+RUN pip install torch==2.3.0 torchaudio==2.3.0 --index-url https://download.pytorch.org/whl/cpu
+RUN pip install intel_extension_for_pytorch==2.3.0 pyyaml six intel-openmp
 RUN ln -s /usr/lib/x86_64-linux-gnu/libtcmalloc.so.4 /usr/lib/x86_64-linux-gnu/libtcmalloc.so
 
 RUN git clone https://github.com/wenet-e2e/wenet.git /home/wenet