docs(finetune): udpate for prompt

unit-mesh · Dec 24, 2023 · fef5b88 · fef5b88
1 parent b1219a0
commit fef5b88
Show file tree

Hide file tree

Showing 2 changed files with 42 additions and 5 deletions.
diff --git a/finetunes/deepseek/README.md b/finetunes/deepseek/README.md
@@ -21,6 +21,43 @@ os.environ["NCCL_P2P_DISABLE"] = "1"
 os.environ["NCCL_IB_DISABLE"] = "1"
 ```
 
+### 4090
+
+```bash
+DATA_PATH="/openbayes/home/summary.jsonl"
+OUTPUT_PATH="/openbayes/home/output"
+
+# NotImplementedError: Using RTX 3090 or 4000 series doesn't support faster communication broadband via P2P or IB. Please
+# set `NCCL_P2P_DISABLE="1"` and `NCCL_IB_DISABLE="1" or use `accelerate launch` which will do this automatically.
+# 
+# !NCCL_P2P_DISABLE=1
+# !NCCL_IB_DISABLE=1
+
+MODEL_PATH="/openbayes/home/deepseek-coder-6.7b-instruct"
+
+!cd DeepSeek-Coder/finetune && deepspeed finetune_deepseekcoder.py \
+    --model_name_or_path $MODEL_PATH \
+    --data_path $DATA_PATH \
+    --output_dir $OUTPUT_PATH \
+    --num_train_epochs 1 \
+    --model_max_length 512 \
+    --per_device_train_batch_size 4 \
+    --per_device_eval_batch_size 1 \
+    --gradient_accumulation_steps 4 \
+    --evaluation_strategy "no" \
+    --save_strategy "steps" \
+    --save_steps 300 \
+    --save_total_limit 1 \
+    --learning_rate 4e-5 \
+    --warmup_steps 10 \
+    --logging_steps 1 \
+    --lr_scheduler_type "cosine" \
+    --gradient_checkpointing True \
+    --report_to "tensorboard" \
+    --deepspeed configs/ds_config_zero3.json \
+    --bf16 True
+```
+
 ## Server
 
 1.install requirements

diff --git a/finetunes/deepseek/finetune.ipynb b/finetunes/deepseek/finetune.ipynb
@@ -85,22 +85,22 @@
     "    --model_name_or_path $MODEL_PATH \\\n",
     "    --data_path $DATA_PATH \\\n",
     "    --output_dir $OUTPUT_PATH \\\n",
-    "    --num_train_epochs 4 \\\n",
+    "    --num_train_epochs 1 \\\n",
     "    --model_max_length 512 \\\n",
-    "    --per_device_train_batch_size 16 \\\n",
+    "    --per_device_train_batch_size 4 \\\n",
     "    --per_device_eval_batch_size 1 \\\n",
     "    --gradient_accumulation_steps 4 \\\n",
     "    --evaluation_strategy \"no\" \\\n",
     "    --save_strategy \"steps\" \\\n",
-    "    --save_steps 50 \\\n",
+    "    --save_steps 300 \\\n",
     "    --save_total_limit 100 \\\n",
-    "    --learning_rate 2e-5 \\\n",
+    "    --learning_rate 4e-5 \\\n",
     "    --warmup_steps 10 \\\n",
     "    --logging_steps 1 \\\n",
     "    --lr_scheduler_type \"cosine\" \\\n",
     "    --gradient_checkpointing True \\\n",
     "    --report_to \"tensorboard\" \\\n",
-    "    --deepspeed configs/ds_config_zero2.json \\\n",
+    "    --deepspeed configs/ds_config_zero3.json \\\n",
     "    --bf16 True"
    ]
   }