forked from FlagOpen/FlagPerf
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
add paddle Bert kunlunxin case (FlagOpen#172)
* add config * update * update * update * update * fix * add * fix * Update README.md --------- Co-authored-by: WZD09 <wangzhengdan@stu.pku.edu.cn>
- Loading branch information
Showing
8 changed files
with
148 additions
and
53 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
53 changes: 20 additions & 33 deletions
53
training/benchmarks/bert/paddle/schedulers/linear_warmup_poly_scheduler.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,48 +1,35 @@ | ||
import torch | ||
from .base import LRScheduler | ||
import sys | ||
from paddle.optimizer.lr import LRScheduler | ||
|
||
|
||
class LinearWarmupPolyDecayScheduler(LRScheduler): | ||
""" | ||
Applies a warm up period to the learning rate. | ||
""" | ||
|
||
def __init__(self, | ||
optimizer, | ||
start_warmup_steps, | ||
startup_warmup_steps, | ||
warmup_steps, | ||
total_steps, | ||
end_learning_rate=0.0, | ||
base_lr, | ||
end_lr=0.0, | ||
degree=1.0, | ||
last_epoch=-1): | ||
self.num_warmup_updates = warmup_steps | ||
self.start_warmup_steps = start_warmup_steps | ||
self.startup_warmup_steps = startup_warmup_steps | ||
self.offset_step = int(startup_warmup_steps == 0) | ||
self.warmup_steps = warmup_steps | ||
self.total_steps = total_steps | ||
self.end_learning_rate = end_learning_rate | ||
self.base_lr = base_lr | ||
self.end_lr = end_lr | ||
self.degree = degree | ||
super(LinearWarmupPolyDecayScheduler, | ||
self).__init__(optimizer, last_epoch) | ||
|
||
if self.last_epoch <= 0: | ||
self.last_epoch = 0 | ||
|
||
def step(self, epoch=None): | ||
param_group = self.optimizer.param_groups[0] | ||
if 'step' in param_group: | ||
self.last_epoch = param_group['step'] + 1 | ||
else: | ||
self.last_epoch += 1 | ||
|
||
for param_group, lr in zip(self.optimizer.param_groups, self.get_lr()): | ||
param_group['lr'] = lr | ||
self).__init__(learning_rate=base_lr, last_epoch=last_epoch) | ||
|
||
def get_lr(self): | ||
mod_step = self.last_epoch - self.start_warmup_steps | ||
if mod_step < self.num_warmup_updates: | ||
progress = mod_step / self.num_warmup_updates | ||
return [(base_lr * progress) for base_lr in self.base_lrs] | ||
step = self.last_epoch + 1 | ||
mod_step = step - self.offset_step - self.startup_warmup_steps | ||
if mod_step < self.warmup_steps: | ||
p = mod_step / (self.warmup_steps + 1e-6) | ||
lr = self.base_lr * p | ||
else: | ||
progress = min(self.last_epoch / self.total_steps, 1.0) | ||
return [(base_lr - self.end_learning_rate) * | ||
(1 - progress)**self.degree + self.end_learning_rate | ||
for base_lr in self.base_lrs] | ||
p = min(1, (step - self.offset_step) / self.total_steps) | ||
lr = (self.base_lr - self.end_lr) * (1 - | ||
p)**self.degree + self.end_lr | ||
return lr |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,73 @@ | ||
|
||
### 模型Checkpoint下载 | ||
[模型Checkpoint下载](../../benchmarks/bert/README.md#模型checkpoint下载) | ||
|
||
|
||
### 测试数据集下载 | ||
[测试数据集下载](../../benchmarks/bert/README.md#测试数据集下载) | ||
|
||
|
||
### Paddle版本运行指南 | ||
|
||
● bash环境变量: | ||
``` | ||
export FLAGS_sync_nccl_allreduce=0 | ||
export FLAGS_fraction_of_gpu_memory_to_use=0.99 | ||
export FLAGS_call_stack_level=2 | ||
export FLAGS_use_fast_math=0 | ||
export FLAGS_enable_nvtx=1 | ||
export BKCL_CCIX_RING=1 | ||
export XPU_PADDLE_L3_SIZE=41943040 | ||
export XPU_PADDLE_FC_TRANS_A=1 | ||
export XPU_VISIBLE_DEVICES=0,1,2,3,4,5,6,7 #可用xpu索引 | ||
``` | ||
|
||
● 运行脚本: | ||
|
||
在该路径目录下 | ||
|
||
``` | ||
python -u -m paddle.distributed.launch --xpus=${XPU_VISIBLE_DEVICES} run_pretraining.py \ | ||
--data_dir data_path \ | ||
--extern_config_dir config_path \ | ||
--extern_config_file config_file.py | ||
``` | ||
|
||
|
||
example: | ||
``` | ||
python -u -m paddle.distributed.launch --xpus=${XPU_VISIBLE_DEVICES} run_pretraining.py \ | ||
--data_dir /bert-data/train \ | ||
--extern_config_dir /home/FlagPerf/training/kunlunxin/bert-paddle/config \ | ||
--extern_config_file config_R300x1x8.py | ||
``` | ||
|
||
|
||
### 昆仑芯XPU配置与运行信息参考 | ||
#### 环境配置 | ||
- ##### 硬件环境 | ||
- 机器型号: 昆仑芯AI加速器组R480-X8 | ||
- 加速卡型号: 昆仑芯AI加速卡R300 | ||
- 多机网络类型、带宽: InfiniBand,200Gb/s | ||
|
||
- ##### 软件环境 | ||
- OS版本:Ubuntu 20.04 | ||
- OS kernel版本: 5.4.0-26-generic | ||
- 加速卡驱动版本:4.0.25 | ||
- Docker镜像和版本:registry.baidubce.com/paddlepaddle/paddle:2.3.2 | ||
- 训练框架版本:paddlepaddle+f6161d1 | ||
- 依赖软件版本:pytorch-1.8.1 | ||
|
||
|
||
|
||
### 运行情况 | ||
| 训练资源 | 配置文件 | 运行时长(s) | 目标精度 | 收敛精度 | Steps数 | 性能(samples/s)| | ||
| -------- | --------------- | ----------- | -------- | -------- | ------- | ---------------- | | ||
| 单机8卡 | config_A100x1x8 | | 0.67 | 0.6709 | 11720 | | | ||
|
||
### 许可证 | ||
|
||
本项目基于Apache 2.0 license。 | ||
|
||
本项目部分代码基于MLCommons https://github.com/mlcommons/training_results_v1.0/tree/master/NVIDIA/benchmarks/ 实现。 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,19 @@ | ||
dist_backend = "xccl" | ||
|
||
target_mlm_accuracy = 0.67 | ||
gradient_accumulation_steps = 7 | ||
max_steps = 50000 | ||
start_warmup_step = 0 | ||
warmup_proportion = 0 | ||
warmup_steps = 0 | ||
|
||
learning_rate = 4e-4 | ||
weight_decay_rate = 0.01 | ||
opt_lamb_beta_1 = 0.9 | ||
opt_lamb_beta_2 = 0.999 | ||
train_batch_size = 8 | ||
eval_batch_size = train_batch_size | ||
max_samples_termination = 4500000 | ||
cache_eval_data = False | ||
|
||
seed = 9031 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,9 @@ | ||
h5py==3.7.0 | ||
six==1.16.0 | ||
absl-py==1.2.0 | ||
paddle-bfloat==0.1.7 | ||
paddle2onnx==1.0.0 | ||
paddlefsl==1.1.0 | ||
paddlenlp==2.4.0 | ||
astor==0.8.1 | ||
torch==1.8.1 |