diff --git a/templates/fine-tune-llm/README.md b/templates/fine-tune-llm/README.md index 9ca35514d..5a3cc7611 100644 --- a/templates/fine-tune-llm/README.md +++ b/templates/fine-tune-llm/README.md @@ -7,6 +7,7 @@ This guide provides starter configurations if you would like to further customiz ### Supported base models - mistralai/Mistral-7B-Instruct-v0.1 +- mistralai/Mixtral-8x7b - meta-llama/Llama-2-7b-hf - meta-llama/Llama-2-7b-chat-hf - meta-llama/Llama-2-13b-hf diff --git a/templates/fine-tune-llm/training_configs/full_param/llama-2-13b-4k-4xg5_12xlarge.yaml b/templates/fine-tune-llm/training_configs/full_param/llama-2-13b-4k-4xg5_12xlarge.yaml index b46992e11..f060236db 100644 --- a/templates/fine-tune-llm/training_configs/full_param/llama-2-13b-4k-4xg5_12xlarge.yaml +++ b/templates/fine-tune-llm/training_configs/full_param/llama-2-13b-4k-4xg5_12xlarge.yaml @@ -8,6 +8,7 @@ train_batch_size_per_device: 8 eval_batch_size_per_device: 8 learning_rate: 5e-6 num_checkpoints_to_keep: 1 +dataset_size_scaling_factor: 10000 output_dir: /mnt/local_storage deepspeed: config_path: deepspeed_configs/zero_3_llama_2_13b.json diff --git a/templates/fine-tune-llm/training_configs/full_param/llama-2-70b-4k-2xp4de_24xlarge.yaml b/templates/fine-tune-llm/training_configs/full_param/llama-2-70b-4k-2xp4de_24xlarge.yaml new file mode 100644 index 000000000..68d6488df --- /dev/null +++ b/templates/fine-tune-llm/training_configs/full_param/llama-2-70b-4k-2xp4de_24xlarge.yaml @@ -0,0 +1,17 @@ +model_id: meta-llama/Llama-2-70b-hf # <-- change this to the model you want to fine-tune +train_path: s3://air-example-data/gsm8k/train.jsonl # <-- change this to the path to your training data +valid_path: s3://air-example-data/gsm8k/test.jsonl # <-- change this to the path to your validation data. This is optional +context_length: 4096 # <-- change this to the context length you want to use +num_devices: 16 # <-- change this to total number of GPUs that you want to use +num_epochs: 1 # <-- change this to the number of epochs that you want to train for +train_batch_size_per_device: 1 +eval_batch_size_per_device: 1 +learning_rate: 5e-6 +num_checkpoints_to_keep: 1 +dataset_size_scaling_factor: 10000 +output_dir: /mnt/local_storage +deepspeed: + config_path: deepspeed_configs/zero_3_llama_2_70b.json +flash_attention_2: True +worker_resources: + p4de.24xlarge: 1 # <-- this maps to job_compute_configs file's custom_resources so the appropriate nodes can scale up \ No newline at end of file diff --git a/templates/fine-tune-llm/training_configs/full_param/llama-2-70b-chat-4k-2xp4de_24xlarge.yaml b/templates/fine-tune-llm/training_configs/full_param/llama-2-70b-chat-4k-2xp4de_24xlarge.yaml new file mode 100644 index 000000000..e45a2d023 --- /dev/null +++ b/templates/fine-tune-llm/training_configs/full_param/llama-2-70b-chat-4k-2xp4de_24xlarge.yaml @@ -0,0 +1,17 @@ +model_id: meta-llama/Llama-2-70b-chat-hf # <-- change this to the model you want to fine-tune +train_path: s3://air-example-data/gsm8k/train.jsonl # <-- change this to the path to your training data +valid_path: s3://air-example-data/gsm8k/test.jsonl # <-- change this to the path to your validation data. This is optional +context_length: 4096 # <-- change this to the context length you want to use +num_devices: 16 # <-- change this to total number of GPUs that you want to use +num_epochs: 1 # <-- change this to the number of epochs that you want to train for +train_batch_size_per_device: 1 +eval_batch_size_per_device: 1 +learning_rate: 5e-6 +num_checkpoints_to_keep: 1 +dataset_size_scaling_factor: 10000 +output_dir: /mnt/local_storage +deepspeed: + config_path: deepspeed_configs/zero_3_llama_2_70b.json +flash_attention_2: True +worker_resources: + p4de.24xlarge: 1 # <-- this maps to job_compute_configs file's custom_resources so the appropriate nodes can scale up \ No newline at end of file diff --git a/templates/fine-tune-llm/training_configs/full_param/llama-2-7b-512-16xg5_4xlarge.yaml b/templates/fine-tune-llm/training_configs/full_param/llama-2-7b-512-16xg5_4xlarge.yaml index 6ad7b6f38..fc184b47a 100644 --- a/templates/fine-tune-llm/training_configs/full_param/llama-2-7b-512-16xg5_4xlarge.yaml +++ b/templates/fine-tune-llm/training_configs/full_param/llama-2-7b-512-16xg5_4xlarge.yaml @@ -8,6 +8,7 @@ train_batch_size_per_device: 16 eval_batch_size_per_device: 16 learning_rate: 5e-6 num_checkpoints_to_keep: 1 +dataset_size_scaling_factor: 10000 output_dir: /mnt/local_storage deepspeed: config_path: deepspeed_configs/zero_3_llama_2_7b.json diff --git a/templates/fine-tune-llm/training_configs/lora/llama-2-13b-4k-4xg5_12xlarge.yaml b/templates/fine-tune-llm/training_configs/lora/llama-2-13b-4k-4xg5_12xlarge.yaml index e6a52eb61..495061970 100644 --- a/templates/fine-tune-llm/training_configs/lora/llama-2-13b-4k-4xg5_12xlarge.yaml +++ b/templates/fine-tune-llm/training_configs/lora/llama-2-13b-4k-4xg5_12xlarge.yaml @@ -8,6 +8,7 @@ train_batch_size_per_device: 8 eval_batch_size_per_device: 8 learning_rate: 5e-6 num_checkpoints_to_keep: 1 +dataset_size_scaling_factor: 10000 output_dir: /mnt/local_storage deepspeed: config_path: deepspeed_configs/zero_3_llama_2_13b.json diff --git a/templates/fine-tune-llm/training_configs/lora/llama-2-7b-512-16xg5_4xlarge.yaml b/templates/fine-tune-llm/training_configs/lora/llama-2-7b-512-16xg5_4xlarge.yaml index 862dd6916..6fa01b9c3 100644 --- a/templates/fine-tune-llm/training_configs/lora/llama-2-7b-512-16xg5_4xlarge.yaml +++ b/templates/fine-tune-llm/training_configs/lora/llama-2-7b-512-16xg5_4xlarge.yaml @@ -8,6 +8,7 @@ train_batch_size_per_device: 16 eval_batch_size_per_device: 16 learning_rate: 5e-6 num_checkpoints_to_keep: 1 +dataset_size_scaling_factor: 10000 output_dir: /mnt/local_storage deepspeed: config_path: deepspeed_configs/zero_3_llama_2_7b.json diff --git a/templates/fine-tune-llm/training_configs/lora/mixtral-8X7b-512-1xp4de_24xlarge-viggo.yaml b/templates/fine-tune-llm/training_configs/lora/mixtral-8X7b-512-1xp4de_24xlarge.yaml similarity index 95% rename from templates/fine-tune-llm/training_configs/lora/mixtral-8X7b-512-1xp4de_24xlarge-viggo.yaml rename to templates/fine-tune-llm/training_configs/lora/mixtral-8X7b-512-1xp4de_24xlarge.yaml index 795d99c30..223b9f59f 100644 --- a/templates/fine-tune-llm/training_configs/lora/mixtral-8X7b-512-1xp4de_24xlarge-viggo.yaml +++ b/templates/fine-tune-llm/training_configs/lora/mixtral-8X7b-512-1xp4de_24xlarge.yaml @@ -7,6 +7,7 @@ train_batch_size_per_device: 4 eval_batch_size_per_device: 4 learning_rate: 1e-4 num_checkpoints_to_keep: 1 +dataset_size_scaling_factor: 10000 no_gradient_checkpoint: False output_dir: /mnt/local_storage deepspeed: