Skip to content

Commit

Permalink
update gpt&baichuan&qwen ce name (#9697)
Browse files Browse the repository at this point in the history
* update gpt&baichuan&qwen ce name

* update gpt&baichuan&qwen ce name

* update gpt&baichuan&qwen ce name

* update gpt&baichuan&qwen ce name

* update gpt&baichuan&qwen ce name
  • Loading branch information
blacksheep-Aristotle authored Dec 26, 2024
1 parent 8c04a15 commit a0c4b4c
Show file tree
Hide file tree
Showing 8 changed files with 11 additions and 11 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@
# See the License for the specific language governing permissions and
# limitations under the License.

param="model_item=gpt3-13b_pretrain "
param="model_item=gpt-3-13b_pretrain "
param+="run_mode=DP1_MP2_PP4_VPP5_Sharding4_Stage1 "
param+="device_num=N4C32 "
param+="global_batch_size=128 "
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -12,8 +12,8 @@
# See the License for the specific language governing permissions and
# limitations under the License.

param="model_item=baichuan-inc-baichaun-2-13b_pretrain "
param+="run_mode=DP1_MP2_PP4_1F1B_Sharding8_Stage2 "
param="model_item=baichuan-inc-baichuan-2-13b_pretrain "
param+="run_mode=DP1_MP4_PP2_1F1B_Sharding4_Stage1 "
param+="device_num=N4C32 "
param+="global_batch_size=128 "
param+="nnodes=4 "
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -44,12 +44,12 @@
"fused_linear": 1,
"fused_linear_param_grad_add": 1,
"use_fused_rope": true,
"use_fused_rms_norm": false,
"use_fused_rms_norm": true,
"max_seq_length": 4096,
"sequence_parallel": false,
"sharding": "stage1",
"sharding_parallel_config": "enable_stage1_tensor_fusion enable_stage1_overlap",
"tensor_parallel_config": "enable_mp_async_allreduce",
"tensor_parallel_config": "enable_mp_async_allreduce replace_with_parallel_cross_entropy",
"data_parallel_config": "enable_allreduce_avg_in_gradinent_scale gradient_sync_after_accumulate",
"pipeline_parallel_config": "enable_send_recv_overlap enable_split_backward"
}
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@
# See the License for the specific language governing permissions and
# limitations under the License.

param="model_item=gpt3-13b_pretrain_dy2st "
param="model_item=gpt-3-13b_pretrain_dy2st "
param+="run_mode=DP1_MP2_PP4_1F1B_Sharding4_Stage1 "
param+="device_num=N4C32 "
param+="global_batch_size=128 "
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@
"fuse_attention_qkv": 1,
"fused_linear_param_grad_add": 1,
"use_fused_rope": true,
"use_fused_rms_norm": false,
"use_fused_rms_norm": true,
"recompute": 0,
"recompute_use_reentrant": true,
"recompute_granularity": "full",
Expand All @@ -52,7 +52,7 @@
"attention_probs_dropout_prob": 0.1,
"hidden_dropout_prob": 0.1,
"sharding_parallel_config": "enable_stage1_tensor_fusion enable_stage1_overlap",
"tensor_parallel_config": "enable_mp_async_allreduce",
"tensor_parallel_config": "enable_mp_async_allreduce replace_with_parallel_cross_entropy",
"data_parallel_config": "enable_allreduce_avg_in_gradinent_scale gradient_sync_after_accumulate",
"pipeline_parallel_config": "enable_send_recv_overlap enable_split_backward"
}
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,7 @@
"pipeline_schedule_mode": "VPP",
"data_parallel_config": "enable_allreduce_avg_in_gradinent_scale gradient_sync_after_accumulate",
"sharding_parallel_config": "enable_stage1_overlap",
"tensor_parallel_config": "enable_mp_async_allreduce",
"tensor_parallel_config": "enable_mp_async_allreduce replace_with_parallel_cross_entropy",
"max_seq_length": 4096,
"to_static": true,
"eliminate_transpose": 1,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@
# See the License for the specific language governing permissions and
# limitations under the License.

param="model_item=qwen-2-14b_pretrain_dy2st "
param="model_item=qwen-14b_pretrain_dy2st "
param+="run_mode=DP1_MP2_PP4_1F1B_Sharding4_Stage1 "
param+="device_num=N4C32 "
param+="global_batch_size=128 "
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,6 @@
"auto_parallel_resume_form_hybrid_parallel": true,
"data_parallel_config": "enable_allreduce_avg_in_gradinent_scale gradient_sync_after_accumulate",
"sharding_parallel_config": "enable_stage1_overlap",
"tensor_parallel_config": "enable_mp_async_allreduce",
"tensor_parallel_config": "enable_mp_async_allreduce replace_with_parallel_cross_entropy",
"pipeline_parallel_config": "enable_send_recv_overlap enable_split_backward"
}

0 comments on commit a0c4b4c

Please sign in to comment.