diff --git a/training/benchmarks/cpm/README.md b/training/benchmarks/cpm/README.md index ecfd8d439..89eeca791 100644 --- a/training/benchmarks/cpm/README.md +++ b/training/benchmarks/cpm/README.md @@ -32,6 +32,6 @@ Permission is hereby granted, free of charge, to any person obtaining a copy of | | Pytorch |Paddle|TensorFlow2| | ---- | ---- | ---- | ---- | | Nvidia GPU | ✅ |N/A |N/A| - +| 天数智芯 | ✅ | N/A |N/A| diff --git a/training/iluvatar/README.md b/training/iluvatar/README.md index 32f140abb..fde67c6cb 100644 --- a/training/iluvatar/README.md +++ b/training/iluvatar/README.md @@ -26,7 +26,7 @@ - 加速卡型号: Iluvatar BI-V100 32G - 软件 - OS kernel版本: - Linux 4.15.0-156-generic x86_64 + Linux 5.4.0-148-generic x86_64 - Docker 版本: 20.10.8 @@ -38,17 +38,17 @@ - 核心软件信息 - AI框架&版本 - torch: 1.10.2+corex.3.0.0 + torch: 1.13.1+corex.3.1.0 - 其它软件版本 cuda: 10.2 - corex: 3.0.0 + corex: 3.1.0 - torchtext: 0.11.2+corex.3.0.0 + torchtext: 0.14.1+corex.3.1.0 - apex: 0.1+corex.3.0.0 + apex: 0.1+corex.3.1.0 diff --git a/training/iluvatar/cpm-pytorch/README.md b/training/iluvatar/cpm-pytorch/README.md index 63365ee6d..440784204 100644 --- a/training/iluvatar/cpm-pytorch/README.md +++ b/training/iluvatar/cpm-pytorch/README.md @@ -1,25 +1,10 @@ -### 模型信息 -- 模型介绍 ->中文预训练语言模型(CPM)是基于transformers 的自回归语言模型,其训练使用了100G中文数据,最大版本包含26亿参数,支持文本分类、文本生成。 ->获取CPM论文了解更多 ->[CPM: A Large-scale Generative Chinese Pre-trained Language Model](https://arxiv.org/abs/2012.00413) - -- 模型代码来源 - -| repo | commmit_id | date | -| ------- | ----------- |----- | -| [CPM-1-Finetune](https://github.com/TsinghuaAI/CPM-1-Finetune) | c0d892185912b28f8efeaeb55905f3f4fb227e46|2021-10-17 21:53:00| - ### 模型Checkpoint下载 -> [下载页](https://model.baai.ac.cn/model-detail/100017) -文件及版本tab页下,pytorch_model.bin. -参数数:2.6B +参见[模型Checkpoint下载](../../benchmarks/cpm/README.md#模型checkpoint) + ### 测试数据集下载 -> Dataset : https://drive.google.com/drive/folders/1gL01xbFBcrgP0TmgOhJ_uplkeG-BCwvM +参见[测试数据集下载](../../benchmarks/cpm/README.md#测试数据集下载地址) -- 预处理 -> 无需预处理 ### 天数智芯 BI-V100 GPU配置与运行信息参考 #### 环境配置 @@ -28,24 +13,37 @@ - ##### 软件环境 - OS版本:Ubuntu 20.04 - - OS kernel版本: 4.15.0-156-generic x86_64 - - 加速卡驱动版本:3.0.0 + - OS kernel版本: 5.4.0-148-generic x86_64 + - 加速卡驱动版本:3.1.0 - Docker 版本:20.10.8 - - 训练框架版本:torch-1.10.2+corex.3.0.0 + - 训练框架版本:torch-1.13.1+corex.3.1.0 - 依赖软件版本:无 - -### 运行情况 -| 训练资源 | 配置文件 | 运行时长(s) | 目标精度 | 收敛精度 | Steps数 | 性能(samples/s) | -| -------- | ------------------ | ---------- | ------- | ------- | ------- | --------------- | -| 单机1卡 | config_BI-V100x1x1 | 6200.51 | 0.8 | 0.8047 | 4375 |23.89 | -| 单机2卡 | config_BI-V100x1x2 | 5291.47 | 0.8 | 0.8015 | 3756 |47.52 | -| 单机4卡 | config_BI-V100x1x4 | 5035.74 | 0.8 | 0.8013 | 3454 |91.27 | -| 单机8卡 | config_BI-V100x1x8 | 5154.91 | 0.92 | 0.9203 | 3476 |178.51 | -| 两机8卡 | config_BI-V100x2x8 | pending | 0.92 | pending | pending |pending | - -### 许可证 - -本项目基于Apache 2.0 license。 - -本项目部分代码基于MLCommons https://github.com/mlcommons/training_results_v1.0/tree/master/NVIDIA 实现。 \ No newline at end of file +#### 运行情况 + +* 通用指标 + +| 指标名称 | 指标值 | 特殊说明 | +| -------------- | ------------------------------ | ------------------------------------------- | +| 任务类别 | 文本分类、文本生成 | | +| 模型 | cpm | | +| 数据集 | CPM-Finetune-data | | +| 数据精度 | precision,见“性能指标” | 可选fp32/amp/fp16 | +| 超参修改 | fix_hp,见“性能指标” | 跑满硬件设备评测吞吐量所需特殊超参 | +| 硬件设备简称 | BI-V100 | | +| 硬件存储使用 | mem(actual/total),见“性能指标” | 通常称为“显存”,单位为GiB | +| 端到端时间 | e2e_time,见“性能指标” | 总时间+Perf初始化等时间 | +| 总吞吐量 | p_whole,见“性能指标” | 实际训练样本数除以总时间(performance_whole) | +| 训练吞吐量 | p_train,见“性能指标” | 不包含每个epoch末尾的评估部分耗时 | +| **计算吞吐量** | **p_core,见“性能指标”** | 不包含数据IO部分的耗时(p3>p2>p1) | +| 训练结果 | acc,见“性能指标” | 分类准确率(mlm_accuracy) | +| 额外修改项 | 无 | | + +* 性能指标 + +| 配置 | precision | fix_hp | e2e_time | p_whole | p_train | p_core | acc | mem | +| ------------------- | --------- | ---------------- | -------- | ------- | ------- | ------ | ----- | --------- | +| BI-V100单机8卡(1x8) | fp16 | / | | | | | 0.92 | 10.7/32.0 | +| BI-V100单机8卡(1x8) | fp16 | bs=128,lr=0.0005 | | | | | 0.92 | 20.3/32.0| +| BI-V100单机单卡(1x1) | fp16 | bs=192,lr=0.0005 | | | | | | 29.1/32.0 | +| BI-V100两机8卡(2x8) | fp16 | bs=128,lr=0.0005 | | | | | | 20.4/32.0 | diff --git a/training/iluvatar/cpm-pytorch/config/config_BI-V100x1x1.py b/training/iluvatar/cpm-pytorch/config/config_BI-V100x1x1.py index 499d88200..025de8239 100755 --- a/training/iluvatar/cpm-pytorch/config/config_BI-V100x1x1.py +++ b/training/iluvatar/cpm-pytorch/config/config_BI-V100x1x1.py @@ -8,7 +8,7 @@ train_batch_size = 32 eval_batch_size = train_batch_size -max_steps = 4000000 +max_steps = 60000 max_samples_termination = 439126000 warmup = 0.2 @@ -19,3 +19,4 @@ eps: float = 1e-08 seed = 23333 +training_event = None diff --git a/training/iluvatar/cpm-pytorch/config/config_BI-V100x1x2.py b/training/iluvatar/cpm-pytorch/config/config_BI-V100x1x2.py deleted file mode 100644 index 499d88200..000000000 --- a/training/iluvatar/cpm-pytorch/config/config_BI-V100x1x2.py +++ /dev/null @@ -1,21 +0,0 @@ -from config_common import * - -fp16 = True -dist_backend = "nccl" -target_embedding_average = 0.8 - -gradient_accumulation_steps = 1 - -train_batch_size = 32 -eval_batch_size = train_batch_size -max_steps = 4000000 -max_samples_termination = 439126000 - -warmup = 0.2 -learning_rate = 0.0005 - -beta_1: float = 0.9 -beta_2: float = 0.99 -eps: float = 1e-08 - -seed = 23333 diff --git a/training/iluvatar/cpm-pytorch/config/config_BI-V100x1x4.py b/training/iluvatar/cpm-pytorch/config/config_BI-V100x1x4.py deleted file mode 100755 index 499d88200..000000000 --- a/training/iluvatar/cpm-pytorch/config/config_BI-V100x1x4.py +++ /dev/null @@ -1,21 +0,0 @@ -from config_common import * - -fp16 = True -dist_backend = "nccl" -target_embedding_average = 0.8 - -gradient_accumulation_steps = 1 - -train_batch_size = 32 -eval_batch_size = train_batch_size -max_steps = 4000000 -max_samples_termination = 439126000 - -warmup = 0.2 -learning_rate = 0.0005 - -beta_1: float = 0.9 -beta_2: float = 0.99 -eps: float = 1e-08 - -seed = 23333 diff --git a/training/iluvatar/cpm-pytorch/config/config_BI-V100x1x8.py b/training/iluvatar/cpm-pytorch/config/config_BI-V100x1x8.py index 486839dbf..8fc5feaa1 100755 --- a/training/iluvatar/cpm-pytorch/config/config_BI-V100x1x8.py +++ b/training/iluvatar/cpm-pytorch/config/config_BI-V100x1x8.py @@ -19,3 +19,4 @@ eps: float = 1e-08 seed = 23333 +training_event = None \ No newline at end of file diff --git a/training/iluvatar/cpm-pytorch/config/config_BI-V100x2x8.py b/training/iluvatar/cpm-pytorch/config/config_BI-V100x2x8.py index 27178bffa..1afc736d1 100755 --- a/training/iluvatar/cpm-pytorch/config/config_BI-V100x2x8.py +++ b/training/iluvatar/cpm-pytorch/config/config_BI-V100x2x8.py @@ -1,14 +1,15 @@ from config_common import * fp16 = True +ddp_type = "apex" dist_backend = "nccl" target_embedding_average = 0.92 gradient_accumulation_steps = 1 -train_batch_size = 32 +train_batch_size = 128 eval_batch_size = train_batch_size -max_steps = 4000 +max_steps = 2000 warmup = 0.2 learning_rate = 0.0005 @@ -18,3 +19,5 @@ eps: float = 1e-08 seed = 23333 +max_samples_termination = 43912600 +training_event = None