From 04658ed4df10480ea92457c999bfd75f22727073 Mon Sep 17 00:00:00 2001
From: Amog Kamsetty <amogkamsetty@gmail.com>
Date: Fri, 23 Feb 2024 16:44:01 -0800
Subject: [PATCH 1/4] mistral full param

Signed-off-by: Amog Kamsetty <amogkamsetty@gmail.com>
---
 .../deepspeed_configs/zero_3_mistral_7b.json  | 35 +++++++++++++++++++
 .../mistral-7b-4k-4xg5_12xlarge.yaml          | 18 ++++++++++
 2 files changed, 53 insertions(+)
 create mode 100644 templates/fine-tune-llm/deepspeed_configs/zero_3_mistral_7b.json
 create mode 100644 templates/fine-tune-llm/training_configs/full_param/mistral-7b-4k-4xg5_12xlarge.yaml

diff --git a/templates/fine-tune-llm/deepspeed_configs/zero_3_mistral_7b.json b/templates/fine-tune-llm/deepspeed_configs/zero_3_mistral_7b.json
new file mode 100644
index 000000000..9cdb2a021
--- /dev/null
+++ b/templates/fine-tune-llm/deepspeed_configs/zero_3_mistral_7b.json
@@ -0,0 +1,35 @@
+{   
+    "fp16": {
+        "enabled": "auto"
+    },
+    "bf16": {
+        "enabled": true
+    },
+    "zero_optimization": {
+        "stage": 3,
+        "offload_optimizer": {
+            "device": "cpu",
+            "pin_memory": true
+        },
+        "offload_param": {
+            "device": "cpu",
+            "pin_memory": true
+        },
+        "overlap_comm": true,
+        "contiguous_gradients": true,
+        "sub_group_size": 1e9,
+        "reduce_bucket_size": 5e8,
+        "stage3_prefetch_bucket_size": 5e8,
+        "stage3_param_persistence_threshold": 1e6,
+        "stage3_max_live_parameters": 1e9,
+        "stage3_max_reuse_distance": 1e9,
+        "stage3_gather_16bit_weights_on_model_save": true,
+        "round_robin_gradients": true
+    },
+    "gradient_accumulation_steps": "auto",
+    "gradient_clipping": "auto",
+    "steps_per_print": 10,
+    "train_batch_size": "auto",
+    "train_micro_batch_size_per_gpu": "auto",
+    "wall_clock_breakdown": false
+}
\ No newline at end of file
diff --git a/templates/fine-tune-llm/training_configs/full_param/mistral-7b-4k-4xg5_12xlarge.yaml b/templates/fine-tune-llm/training_configs/full_param/mistral-7b-4k-4xg5_12xlarge.yaml
new file mode 100644
index 000000000..fb0dc913f
--- /dev/null
+++ b/templates/fine-tune-llm/training_configs/full_param/mistral-7b-4k-4xg5_12xlarge.yaml
@@ -0,0 +1,18 @@
+model_id: mistralai/Mistral-7B-Instruct-v0.1 # <-- change this to the model you want to fine-tune
+train_path: s3://air-example-data/viggo/train.jsonl
+valid_path: s3://air-example-data/viggo/valid.jsonl
+context_length: 4096 # <-- change this to the context length you want to use
+num_devices: 16 # <-- change this to total number of GPUs that you want to use
+num_epochs: 1 # <-- change this to the number of epochs that you want to train for
+train_batch_size_per_device: 2
+eval_batch_size_per_device: 2
+learning_rate: 1e-4
+num_checkpoints_to_keep: 1
+no_gradient_checkpoint: False
+dataset_size_scaling_factor: 10000
+output_dir: /mnt/local_storage
+deepspeed:
+  config_path: deepspeed_configs/zero_3_mistral_7b.json
+flash_attention_2: True
+worker_resources:
+  g5.12xlarge: 1 # <-- this maps to job_compute_configs file's custom_resources so the appropriate nodes can scale up
\ No newline at end of file

From 6da1afce427d527d858ca66237c38c92e0b1692e Mon Sep 17 00:00:00 2001
From: Amog Kamsetty <amogkamsetty@gmail.com>
Date: Fri, 23 Feb 2024 17:10:30 -0800
Subject: [PATCH 2/4] lint

Signed-off-by: Amog Kamsetty <amogkamsetty@gmail.com>
---
 .../full_param/mistral-7b-4k-4xg5_12xlarge.yaml                 | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/templates/fine-tune-llm/training_configs/full_param/mistral-7b-4k-4xg5_12xlarge.yaml b/templates/fine-tune-llm/training_configs/full_param/mistral-7b-4k-4xg5_12xlarge.yaml
index fb0dc913f..52228990d 100644
--- a/templates/fine-tune-llm/training_configs/full_param/mistral-7b-4k-4xg5_12xlarge.yaml
+++ b/templates/fine-tune-llm/training_configs/full_param/mistral-7b-4k-4xg5_12xlarge.yaml
@@ -15,4 +15,4 @@ deepspeed:
   config_path: deepspeed_configs/zero_3_mistral_7b.json
 flash_attention_2: True
 worker_resources:
-  g5.12xlarge: 1 # <-- this maps to job_compute_configs file's custom_resources so the appropriate nodes can scale up
\ No newline at end of file
+  g5.12xlarge: 1 # <-- this maps to job_compute_configs file's custom_resources so the appropriate nodes can scale up

From a577741850267e3492f55028b4e179f077730e85 Mon Sep 17 00:00:00 2001
From: Amog Kamsetty <amogkamsetty@gmail.com>
Date: Fri, 23 Feb 2024 17:17:10 -0800
Subject: [PATCH 3/4] lint

Signed-off-by: Amog Kamsetty <amogkamsetty@gmail.com>
---
 .../fine-tune-llm/deepspeed_configs/zero_3_mistral_7b.json      | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/templates/fine-tune-llm/deepspeed_configs/zero_3_mistral_7b.json b/templates/fine-tune-llm/deepspeed_configs/zero_3_mistral_7b.json
index 9cdb2a021..07877dd5d 100644
--- a/templates/fine-tune-llm/deepspeed_configs/zero_3_mistral_7b.json
+++ b/templates/fine-tune-llm/deepspeed_configs/zero_3_mistral_7b.json
@@ -32,4 +32,4 @@
     "train_batch_size": "auto",
     "train_micro_batch_size_per_gpu": "auto",
     "wall_clock_breakdown": false
-}
\ No newline at end of file
+}

From e33f6db7a0e81eb32a460e6db30f01324f0745c8 Mon Sep 17 00:00:00 2001
From: Amog Kamsetty <amogkamsetty@gmail.com>
Date: Fri, 23 Feb 2024 17:21:11 -0800
Subject: [PATCH 4/4] lint

Signed-off-by: Amog Kamsetty <amogkamsetty@gmail.com>
---
 .../fine-tune-llm/deepspeed_configs/zero_3_mistral_7b.json      | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/templates/fine-tune-llm/deepspeed_configs/zero_3_mistral_7b.json b/templates/fine-tune-llm/deepspeed_configs/zero_3_mistral_7b.json
index 07877dd5d..2ebec1f89 100644
--- a/templates/fine-tune-llm/deepspeed_configs/zero_3_mistral_7b.json
+++ b/templates/fine-tune-llm/deepspeed_configs/zero_3_mistral_7b.json
@@ -1,4 +1,4 @@
-{   
+{
     "fp16": {
         "enabled": "auto"
     },