meta-pytorch
diff --git a/‎recipes/configs/code_llama2/7B_full_low_memory.yaml‎
Lines changed: 3 additions & 1 deletion b/‎recipes/configs/code_llama2/7B_full_low_memory.yaml‎
Lines changed: 3 additions & 1 deletion
diff --git a/‎recipes/configs/code_llama2/7B_lora_single_device.yaml‎
Lines changed: 3 additions & 1 deletion b/‎recipes/configs/code_llama2/7B_lora_single_device.yaml‎
Lines changed: 3 additions & 1 deletion
diff --git a/‎recipes/configs/code_llama2/7B_qlora_single_device.yaml‎
Lines changed: 2 additions & 1 deletion b/‎recipes/configs/code_llama2/7B_qlora_single_device.yaml‎
Lines changed: 2 additions & 1 deletion
diff --git a/‎recipes/configs/dev/8B_full_experimental.yaml‎
Lines changed: 3 additions & 1 deletion b/‎recipes/configs/dev/8B_full_experimental.yaml‎
Lines changed: 3 additions & 1 deletion
diff --git a/‎recipes/configs/gemma/2B_full.yaml‎
Lines changed: 3 additions & 1 deletion b/‎recipes/configs/gemma/2B_full.yaml‎
Lines changed: 3 additions & 1 deletion
diff --git a/‎recipes/configs/gemma/2B_lora.yaml‎
Lines changed: 3 additions & 1 deletion b/‎recipes/configs/gemma/2B_lora.yaml‎
Lines changed: 3 additions & 1 deletion
diff --git a/‎recipes/configs/gemma/2B_lora_single_device.yaml‎
Lines changed: 2 additions & 1 deletion b/‎recipes/configs/gemma/2B_lora_single_device.yaml‎
Lines changed: 2 additions & 1 deletion
diff --git a/‎recipes/configs/gemma/2B_qlora_single_device.yaml‎
Lines changed: 2 additions & 1 deletion b/‎recipes/configs/gemma/2B_qlora_single_device.yaml‎
Lines changed: 2 additions & 1 deletion
diff --git a/‎recipes/configs/gemma/7B_full.yaml‎
Lines changed: 3 additions & 1 deletion b/‎recipes/configs/gemma/7B_full.yaml‎
Lines changed: 3 additions & 1 deletion
diff --git a/‎recipes/configs/gemma/7B_lora.yaml‎
Lines changed: 3 additions & 1 deletion b/‎recipes/configs/gemma/7B_lora.yaml‎
Lines changed: 3 additions & 1 deletion
@@ -45,7 +45,9 @@ resume_from_checkpoint: False
 
 # Dataset
 dataset:
+  packed: False # Set to true for great speed ups
   _component_: torchtune.datasets.alpaca_dataset
+
 seed: null
 shuffle: True
 
@@ -75,4 +77,4 @@ metric_logger:
   _component_: torchtune.training.metric_logging.DiskLogger
   log_dir: /tmp/CodeLlama-7b-hf/logs
 log_every_n_steps: 1
-log_peak_memory_stats: False
+log_peak_memory_stats: True
@@ -49,7 +49,9 @@ save_adapter_weights_only: False
 
 # Dataset
 dataset:
+  packed: False # Set to true for great speed ups
   _component_: torchtune.datasets.alpaca_cleaned_dataset
+
 seed: null
 shuffle: True
 
@@ -84,7 +86,7 @@ metric_logger:
   _component_: torchtune.training.metric_logging.DiskLogger
   log_dir: /tmp/CodeLlama-7b-hf/logs
 log_every_n_steps: 1
-log_peak_memory_stats: False
+log_peak_memory_stats: True
 
 # Showcase the usage of PyTorch profiler
 # Set enabled to False as it's only needed for debugging training
 
@@ -49,6 +49,7 @@ save_adapter_weights_only: False
 
 # Dataset
 dataset:
+  packed: False # Set to true for great speed ups
   _component_: torchtune.datasets.alpaca_cleaned_dataset
 seed: null
 shuffle: True
@@ -84,7 +85,7 @@ metric_logger:
   _component_: torchtune.training.metric_logging.DiskLogger
   log_dir: /tmp/CodeLlama-7b-hf/logs
 log_every_n_steps: 1
-log_peak_memory_stats: False
+log_peak_memory_stats: True
 
 # Show case the usage of pytorch profiler
 # Set enabled to False as it's only needed for debugging training
 
@@ -26,6 +26,7 @@ tokenizer:
 
 # Dataset
 dataset:
+  packed: False # Set to true for great speed ups
   _component_: torchtune.datasets.alpaca_dataset
 seed: null
 shuffle: True
@@ -57,7 +58,7 @@ loss:
   _component_: torchtune.modules.loss.CEWithChunkedOutputLoss
 max_steps_per_epoch: null
 gradient_accumulation_steps: 1
-
+compile: False
 
 # Training env
 device: cuda
@@ -78,3 +79,4 @@ metric_logger:
   log_dir: ${output_dir}
 output_dir: /tmp/alpaca-llama3-finetune
 log_every_n_steps: null
+log_peak_memory_stats: True
@@ -23,6 +23,7 @@ tokenizer:
 
 # Dataset
 dataset:
+  packed: False # Set to true for great speed ups
   _component_: torchtune.datasets.alpaca_dataset
 seed: null
 shuffle: True
@@ -54,6 +55,7 @@ loss:
   _component_: torchtune.modules.loss.CEWithChunkedOutputLoss
 max_steps_per_epoch: null
 gradient_accumulation_steps: 1
+compile: False
 
 # Training env
 device: cuda
@@ -70,4 +72,4 @@ metric_logger:
   log_dir: ${output_dir}
 output_dir: /tmp/alpaca-gemma-finetune
 log_every_n_steps: 1
-log_peak_memory_stats: False
+log_peak_memory_stats: True
@@ -22,6 +22,7 @@ tokenizer:
 
 # Dataset
 dataset:
+  packed: False # Set to true for great speed ups
   _component_: torchtune.datasets.alpaca_dataset
 seed: null
 shuffle: True
@@ -66,6 +67,7 @@ batch_size: 4
 epochs: 3
 max_steps_per_epoch: null
 gradient_accumulation_steps: 1
+compile: False
 
 # Training env
 device: cuda
@@ -82,4 +84,4 @@ metric_logger:
   log_dir: ${output_dir}
 output_dir: /tmp/alpaca-gemma-lora
 log_every_n_steps: 1
-log_peak_memory_stats: False
+log_peak_memory_stats: True
@@ -22,6 +22,7 @@ tokenizer:
 
 # Dataset
 dataset:
+  packed: False # Set to true for great speed ups
   _component_: torchtune.datasets.alpaca_dataset
 seed: null
 shuffle: True
@@ -83,7 +84,7 @@ metric_logger:
   log_dir: ${output_dir}
 output_dir: /tmp/alpaca-gemma-lora
 log_every_n_steps: 1
-log_peak_memory_stats: False
+log_peak_memory_stats: True
 
 # Show case the usage of pytorch profiler
 # Set enabled to False as it's only needed for debugging training
 
@@ -22,6 +22,7 @@ tokenizer:
 
 # Dataset
 dataset:
+  packed: False # Set to true for great speed ups
   _component_: torchtune.datasets.alpaca_dataset
 seed: null
 shuffle: True
@@ -83,7 +84,7 @@ metric_logger:
   log_dir: ${output_dir}
 output_dir: /tmp/alpaca-gemma-lora
 log_every_n_steps: 1
-log_peak_memory_stats: False
+log_peak_memory_stats: True
 
 # Show case the usage of pytorch profiler
 # Set enabled to False as it's only needed for debugging training
 
@@ -23,6 +23,7 @@ tokenizer:
 
 # Dataset
 dataset:
+  packed: False # Set to true for great speed ups
   _component_: torchtune.datasets.alpaca_dataset
 seed: null
 shuffle: True
@@ -56,6 +57,7 @@ loss:
   _component_: torchtune.modules.loss.CEWithChunkedOutputLoss
 max_steps_per_epoch: null
 gradient_accumulation_steps: 1
+compile: False
 
 # Training env
 device: cuda
@@ -72,4 +74,4 @@ metric_logger:
   log_dir: ${output_dir}
 output_dir: /tmp/alpaca-gemma-finetune
 log_every_n_steps: 1
-log_peak_memory_stats: False
+log_peak_memory_stats: True
@@ -23,6 +23,7 @@ tokenizer:
 
 # Dataset
 dataset:
+  packed: False # Set to true for great speed ups
   _component_: torchtune.datasets.alpaca_dataset
 seed: null
 shuffle: True
@@ -68,6 +69,7 @@ batch_size: 4
 epochs: 3
 max_steps_per_epoch: null
 gradient_accumulation_steps: 1
+compile: False
 
 # Training env
 device: cuda
@@ -84,4 +86,4 @@ metric_logger:
   log_dir: ${output_dir}
 output_dir: /tmp/alpaca-gemma-lora
 log_every_n_steps: 1
-log_peak_memory_stats: False
+log_peak_memory_stats: True