From d23c9e92042331da9fa3cdadcd6b9699284e6698 Mon Sep 17 00:00:00 2001 From: Phodal Huang Date: Wed, 20 Dec 2023 21:06:07 +0800 Subject: [PATCH] chore: update magic parameters for finetune :loop::loop::loop::loop::loop::loop: --- finetunes/deepseek/ds_config_zero2.json | 50 ++++++++++++++++++ finetunes/deepseek/ds_config_zero3.json | 51 +++++++++++++++++++ finetunes/deepseek/finetune.ipynb | 10 ++-- .../main/kotlin/cc/unitmesh/runner/Picker.kt | 2 +- 4 files changed, 107 insertions(+), 6 deletions(-) create mode 100644 finetunes/deepseek/ds_config_zero2.json create mode 100644 finetunes/deepseek/ds_config_zero3.json diff --git a/finetunes/deepseek/ds_config_zero2.json b/finetunes/deepseek/ds_config_zero2.json new file mode 100644 index 00000000..4387c5fc --- /dev/null +++ b/finetunes/deepseek/ds_config_zero2.json @@ -0,0 +1,50 @@ +{ + "fp16": { + "enabled": "auto", + "loss_scale": 0, + "loss_scale_window": 1000, + "initial_scale_power": 16, + "hysteresis": 2, + "min_loss_scale": 1 + }, + + "optimizer": { + "type": "AdamW", + "params": { + "lr": "auto", + "betas": "auto", + "eps": "auto", + "weight_decay": "auto" + } + }, + + "scheduler": { + "type": "WarmupLR", + "params": { + "warmup_min_lr": "auto", + "warmup_max_lr": "auto", + "warmup_num_steps": "auto" + } + }, + + "zero_optimization": { + "stage": 2, + "offload_optimizer": { + "device": "cpu", + "pin_memory": true + }, + "allgather_partitions": true, + "allgather_bucket_size": 2e8, + "overlap_comm": true, + "reduce_scatter": true, + "reduce_bucket_size": 2e8, + "contiguous_gradients": true + }, + + "gradient_accumulation_steps": "auto", + "gradient_clipping": "auto", + "steps_per_print": 2000, + "train_batch_size": "auto", + "train_micro_batch_size_per_gpu": "auto", + "wall_clock_breakdown": false +} \ No newline at end of file diff --git a/finetunes/deepseek/ds_config_zero3.json b/finetunes/deepseek/ds_config_zero3.json new file mode 100644 index 00000000..e056d205 --- /dev/null +++ b/finetunes/deepseek/ds_config_zero3.json @@ -0,0 +1,51 @@ +{ + "bf16": { + "enabled": "auto" + }, + "optimizer": { + "type": "AdamW", + "params": { + "lr": "auto", + "betas": "auto", + "eps": "auto", + "weight_decay": "auto" + } + }, + + "scheduler": { + "type": "WarmupLR", + "params": { + "warmup_min_lr": "auto", + "warmup_max_lr": "auto", + "warmup_num_steps": "auto" + } + }, + + "zero_optimization": { + "stage": 3, + "offload_optimizer": { + "device": "cpu", + "pin_memory": true + }, + "offload_param": { + "device": "cpu", + "pin_memory": true + }, + "overlap_comm": true, + "contiguous_gradients": true, + "sub_group_size": 1e9, + "reduce_bucket_size": "auto", + "stage3_prefetch_bucket_size": "auto", + "stage3_param_persistence_threshold": "auto", + "stage3_max_live_parameters": 1e9, + "stage3_max_reuse_distance": 1e9, + "stage3_gather_16bit_weights_on_model_save": true + }, + + "gradient_accumulation_steps": "auto", + "gradient_clipping": "auto", + "steps_per_print": 20, + "train_batch_size": "auto", + "train_micro_batch_size_per_gpu": "auto", + "wall_clock_breakdown": false +} \ No newline at end of file diff --git a/finetunes/deepseek/finetune.ipynb b/finetunes/deepseek/finetune.ipynb index 93e146b7..4b47cb73 100644 --- a/finetunes/deepseek/finetune.ipynb +++ b/finetunes/deepseek/finetune.ipynb @@ -85,14 +85,14 @@ " --model_name_or_path $MODEL_PATH \\\n", " --data_path $DATA_PATH \\\n", " --output_dir $OUTPUT_PATH \\\n", - " --num_train_epochs 3 \\\n", - " --model_max_length 1024 \\\n", - " --per_device_train_batch_size 2 \\\n", + " --num_train_epochs 4 \\\n", + " --model_max_length 512 \\\n", + " --per_device_train_batch_size 16 \\\n", " --per_device_eval_batch_size 1 \\\n", " --gradient_accumulation_steps 4 \\\n", " --evaluation_strategy \"no\" \\\n", " --save_strategy \"steps\" \\\n", - " --save_steps 100 \\\n", + " --save_steps 50 \\\n", " --save_total_limit 100 \\\n", " --learning_rate 2e-5 \\\n", " --warmup_steps 10 \\\n", @@ -100,7 +100,7 @@ " --lr_scheduler_type \"cosine\" \\\n", " --gradient_checkpointing True \\\n", " --report_to \"tensorboard\" \\\n", - " --deepspeed configs/ds_config_zero3.json \\\n", + " --deepspeed configs/ds_config_zero2.json \\\n", " --bf16 True" ] } diff --git a/unit-cli/src/main/kotlin/cc/unitmesh/runner/Picker.kt b/unit-cli/src/main/kotlin/cc/unitmesh/runner/Picker.kt index db289d26..d0fbebbf 100644 --- a/unit-cli/src/main/kotlin/cc/unitmesh/runner/Picker.kt +++ b/unit-cli/src/main/kotlin/cc/unitmesh/runner/Picker.kt @@ -17,7 +17,7 @@ private val logger = org.slf4j.LoggerFactory.getLogger(PickerCommand::class.java fun main(args: Array) = PickerCommand().main(args) class PickerCommand : CliktCommand() { - val completionTypeSize by option(help = "Limit each CompletionType size").int().default(1000) + val completionTypeSize by option(help = "Limit each CompletionType size").int().default(500) override fun run() { val outputDir = File("datasets" + File.separator + "origin")