@@ -22,18 +22,18 @@ tokenizer:
2222
2323# Dataset
2424dataset :
25- packed : False # Set to true for great speed ups
2625 _component_ : torchtune.datasets.alpaca_dataset
26+ packed : False # True increases speed
2727seed : null
2828shuffle : True
2929
3030# Model Arguments
3131model :
3232 _component_ : torchtune.models.gemma.lora_gemma_2b
33- lora_attn_modules : ['q_proj', 'k_proj ', 'v_proj ']
33+ lora_attn_modules : ['q_proj', 'v_proj ', 'output_proj ']
3434 apply_lora_to_mlp : True
35- lora_rank : 64
36- lora_alpha : 128
35+ lora_rank : 64 # higher increases accuracy and memory
36+ lora_alpha : 128 # usually alpha=2*rank
3737 lora_dropout : 0.0
3838
3939checkpointer :
@@ -66,14 +66,14 @@ loss:
6666batch_size : 4
6767epochs : 3
6868max_steps_per_epoch : null
69- gradient_accumulation_steps : 1
70- compile : False
69+ gradient_accumulation_steps : 1 # Use to increase virtual batch size
70+ compile : False # pytorch compile, set to true for better perf/memory
7171
7272# Training env
7373device : cuda
7474
7575# Memory management
76- enable_activation_checkpointing : True
76+ enable_activation_checkpointing : True # True reduces memory
7777enable_activation_offloading : False # True reduces memory
7878
7979# Reduced precision
@@ -86,3 +86,28 @@ metric_logger:
8686output_dir : /tmp/alpaca-gemma-lora
8787log_every_n_steps : 1
8888log_peak_memory_stats : True
89+
90+ # Profiler (disabled)
91+ profiler :
92+ _component_ : torchtune.training.setup_torch_profiler
93+ enabled : False
94+
95+ # Output directory of trace artifacts
96+ output_dir : ${output_dir}/profiling_outputs
97+
98+ # `torch.profiler.ProfilerActivity` types to trace
99+ cpu : True
100+ cuda : True
101+
102+ # trace options passed to `torch.profiler.profile`
103+ profile_memory : False
104+ with_stack : False
105+ record_shapes : True
106+ with_flops : False
107+
108+ # `torch.profiler.schedule` options:
109+ # wait_steps -> wait, warmup_steps -> warmup, active_steps -> active, num_cycles -> repeat
110+ wait_steps : 5
111+ warmup_steps : 3
112+ active_steps : 2
113+ num_cycles : 1
0 commit comments