FlagOpen · yuzhou03 · Sep 7, 2023 · Aug 31, 2023 · Aug 31, 2023 · Sep 1, 2023
diff --git a/training/kunlunxin/README.md b/training/kunlunxin/README.md
@@ -26,7 +26,10 @@ R480-X8基于多芯片间高速互联技术，单机可提供高达1 Peta Ops @F
   - OS版本：Ubuntu 20.04
   - OS kernel版本: 5.4.0-26-generic
   - 加速卡驱动版本：4.0.25
-  - Docker镜像和版本：pytorch1.12.1-cpu-ubuntu18.04:v0.04
+  - Docker镜像和版本：pytorch1.12.1-cpu-ubuntu20.04:v0.01
+  - 训练框架版本: xmlir+111e7d45[xmlir下载](https://bd.bcebos.com/klx-pytorch-ipipe-bd/flagperf/111e7d45/xacc-0.1.0-cp38-cp38-linux_x86_64.whl)
+  - 训练编译器版本: xacc+111e7d45[xacc下载](https://bd.bcebos.com/klx-pytorch-ipipe-bd/flagperf/111e7d45/xmlir-0.0.1-cp38-cp38-linux_x86_64.whl)
+  - 依赖软件版本：pytorch-1.12.1+cpu
 
 ## 容器镜像信息
 - 容器构建信息

diff --git a/training/kunlunxin/docker_image/pytorch/pytorch_install.sh b/training/kunlunxin/docker_image/pytorch/pytorch_install.sh
@@ -2,5 +2,6 @@
 
 set -xe
 
+
 pip install https://bd.bcebos.com/klx-pytorch-ipipe-bd/flagperf/latest/xacc-0.1.0-cp38-cp38-linux_x86_64.whl
 pip install https://bd.bcebos.com/klx-pytorch-ipipe-bd/flagperf/latest/xmlir-0.0.1-cp38-cp38-linux_x86_64.whl
diff --git a/training/kunlunxin/glm-pytorch/config/config_R300x1x1.py b/training/kunlunxin/glm-pytorch/config/config_R300x1x1.py
@@ -2,7 +2,7 @@
 fp16 = False
 
 train_batch_size = 4
-eval_batch_size = 6
+eval_batch_size = 4
 
 dist_backend = "xccl"
 

diff --git a/training/kunlunxin/glm-pytorch/config/config_R300x1x8.py b/training/kunlunxin/glm-pytorch/config/config_R300x1x8.py
@@ -1,8 +1,8 @@
 vendor = 'kunlunxin'
 fp16 = False
 
-train_batch_size = 4
-eval_batch_size = 6
+train_batch_size = 5
+eval_batch_size = 5
 
 dist_backend = "xccl"
 

diff --git a/training/kunlunxin/glm-pytorch/config/config_R300x2x8.py b/training/kunlunxin/glm-pytorch/config/config_R300x2x8.py
@@ -2,7 +2,7 @@
 fp16 = False
 
 train_batch_size = 4
-eval_batch_size = 6
+eval_batch_size = 4
 
 dist_backend = "xccl"
 

diff --git a/training/kunlunxin/glm-pytorch/config/environment_variables.sh b/training/kunlunxin/glm-pytorch/config/environment_variables.sh
@@ -7,6 +7,11 @@ export BKCL_TIMEOUT=1800
 # when using tree allreduce, the number of nodes must be a multiple of 2
 export BKCL_SOCKET_FORCE_TREE=1
 
+export XMLIR_D_XPU_L3_SIZE=32505856
+
+export BKCL_CCIX_RING=1
+export BKCL_FORCE_SYNC=1
+
 export ALLREDUCE_ASYNC=false
 export ALLREDUCE_FUSION=0
 

diff --git a/training/kunlunxin/glm-pytorch/config/requirements.txt b/training/kunlunxin/glm-pytorch/config/requirements.txt
@@ -1,3 +1,6 @@
 h5sparse
 boto3
 h5py
+numpy>=1.15.4
+sentencepiece>=0.1.8
+jieba