{ "best_metric": null, "best_model_checkpoint": null, "epoch": 4.229820232640113, "global_step": 750, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.11, "learning_rate": 0.0002, "loss": 1.0176, "step": 20 }, { "epoch": 0.11, "eval_loss": 0.7669806480407715, "eval_runtime": 119.3368, "eval_samples_per_second": 16.759, "eval_steps_per_second": 0.268, "step": 20 }, { "epoch": 0.23, "learning_rate": 0.0002, "loss": 0.8089, "step": 40 }, { "epoch": 0.23, "eval_loss": 0.7102532386779785, "eval_runtime": 119.0614, "eval_samples_per_second": 16.798, "eval_steps_per_second": 0.269, "step": 40 }, { "epoch": 0.34, "learning_rate": 0.0002, "loss": 0.7639, "step": 60 }, { "epoch": 0.34, "eval_loss": 0.6923925876617432, "eval_runtime": 119.0614, "eval_samples_per_second": 16.798, "eval_steps_per_second": 0.269, "step": 60 }, { "epoch": 0.45, "learning_rate": 0.0002, "loss": 0.7485, "step": 80 }, { "epoch": 0.45, "eval_loss": 0.6790987253189087, "eval_runtime": 119.0299, "eval_samples_per_second": 16.803, "eval_steps_per_second": 0.269, "step": 80 }, { "epoch": 0.56, "learning_rate": 0.0002, "loss": 0.7492, "step": 100 }, { "epoch": 0.56, "eval_loss": 0.6719343662261963, "eval_runtime": 119.0235, "eval_samples_per_second": 16.803, "eval_steps_per_second": 0.269, "step": 100 }, { "epoch": 0.68, "learning_rate": 0.0002, "loss": 0.7258, "step": 120 }, { "epoch": 0.68, "eval_loss": 0.6718525290489197, "eval_runtime": 118.993, "eval_samples_per_second": 16.808, "eval_steps_per_second": 0.269, "step": 120 }, { "epoch": 0.79, "learning_rate": 0.0002, "loss": 0.7119, "step": 140 }, { "epoch": 0.79, "eval_loss": 0.6600658297538757, "eval_runtime": 119.0147, "eval_samples_per_second": 16.805, "eval_steps_per_second": 0.269, "step": 140 }, { "epoch": 0.9, "learning_rate": 0.0002, "loss": 0.6969, "step": 160 }, { "epoch": 0.9, "eval_loss": 0.6557925343513489, "eval_runtime": 119.0117, "eval_samples_per_second": 16.805, "eval_steps_per_second": 0.269, "step": 160 }, { "epoch": 1.02, "learning_rate": 0.0002, "loss": 0.6787, "step": 180 }, { "epoch": 1.02, "eval_loss": 0.6543287038803101, "eval_runtime": 119.1041, "eval_samples_per_second": 16.792, "eval_steps_per_second": 0.269, "step": 180 }, { "epoch": 1.13, "learning_rate": 0.0002, "loss": 0.4496, "step": 200 }, { "epoch": 1.13, "eval_loss": 0.6743494868278503, "eval_runtime": 119.0621, "eval_samples_per_second": 16.798, "eval_steps_per_second": 0.269, "step": 200 }, { "epoch": 1.24, "learning_rate": 0.0002, "loss": 0.4524, "step": 220 }, { "epoch": 1.24, "eval_loss": 0.6678915619850159, "eval_runtime": 119.1408, "eval_samples_per_second": 16.787, "eval_steps_per_second": 0.269, "step": 220 }, { "epoch": 1.35, "learning_rate": 0.0002, "loss": 0.442, "step": 240 }, { "epoch": 1.35, "eval_loss": 0.6705079078674316, "eval_runtime": 119.0664, "eval_samples_per_second": 16.797, "eval_steps_per_second": 0.269, "step": 240 }, { "epoch": 1.47, "learning_rate": 0.0002, "loss": 0.4549, "step": 260 }, { "epoch": 1.47, "eval_loss": 0.6721625924110413, "eval_runtime": 119.0399, "eval_samples_per_second": 16.801, "eval_steps_per_second": 0.269, "step": 260 }, { "epoch": 1.58, "learning_rate": 0.0002, "loss": 0.4514, "step": 280 }, { "epoch": 1.58, "eval_loss": 0.6696570515632629, "eval_runtime": 119.0629, "eval_samples_per_second": 16.798, "eval_steps_per_second": 0.269, "step": 280 }, { "epoch": 1.69, "learning_rate": 0.0002, "loss": 0.4667, "step": 300 }, { "epoch": 1.69, "eval_loss": 0.6704996228218079, "eval_runtime": 119.0832, "eval_samples_per_second": 16.795, "eval_steps_per_second": 0.269, "step": 300 }, { "epoch": 1.8, "learning_rate": 0.0002, "loss": 0.4575, "step": 320 }, { "epoch": 1.8, "eval_loss": 0.6683193445205688, "eval_runtime": 118.9913, "eval_samples_per_second": 16.808, "eval_steps_per_second": 0.269, "step": 320 }, { "epoch": 1.92, "learning_rate": 0.0002, "loss": 0.4573, "step": 340 }, { "epoch": 1.92, "eval_loss": 0.6716107726097107, "eval_runtime": 119.0375, "eval_samples_per_second": 16.801, "eval_steps_per_second": 0.269, "step": 340 }, { "epoch": 2.03, "learning_rate": 0.0002, "loss": 0.4236, "step": 360 }, { "epoch": 2.03, "eval_loss": 0.7066434621810913, "eval_runtime": 119.0223, "eval_samples_per_second": 16.804, "eval_steps_per_second": 0.269, "step": 360 }, { "epoch": 2.14, "learning_rate": 0.0002, "loss": 0.2833, "step": 380 }, { "epoch": 2.14, "eval_loss": 0.7021411657333374, "eval_runtime": 118.968, "eval_samples_per_second": 16.811, "eval_steps_per_second": 0.269, "step": 380 }, { "epoch": 2.26, "learning_rate": 0.0002, "loss": 0.2965, "step": 400 }, { "epoch": 2.26, "eval_loss": 0.7043672204017639, "eval_runtime": 119.072, "eval_samples_per_second": 16.797, "eval_steps_per_second": 0.269, "step": 400 }, { "epoch": 2.37, "learning_rate": 0.0002, "loss": 0.2935, "step": 420 }, { "epoch": 2.37, "eval_loss": 0.7043473124504089, "eval_runtime": 119.032, "eval_samples_per_second": 16.802, "eval_steps_per_second": 0.269, "step": 420 }, { "epoch": 2.48, "learning_rate": 0.0002, "loss": 0.3019, "step": 440 }, { "epoch": 2.48, "eval_loss": 0.7078304290771484, "eval_runtime": 119.0363, "eval_samples_per_second": 16.802, "eval_steps_per_second": 0.269, "step": 440 }, { "epoch": 2.59, "learning_rate": 0.0002, "loss": 0.3019, "step": 460 }, { "epoch": 2.59, "eval_loss": 0.7049448490142822, "eval_runtime": 119.086, "eval_samples_per_second": 16.795, "eval_steps_per_second": 0.269, "step": 460 }, { "epoch": 2.71, "learning_rate": 0.0002, "loss": 0.302, "step": 480 }, { "epoch": 2.71, "eval_loss": 0.7078941464424133, "eval_runtime": 119.0908, "eval_samples_per_second": 16.794, "eval_steps_per_second": 0.269, "step": 480 }, { "epoch": 2.82, "learning_rate": 0.0002, "loss": 0.318, "step": 500 }, { "epoch": 2.82, "eval_loss": 0.7076705694198608, "eval_runtime": 119.1411, "eval_samples_per_second": 16.787, "eval_steps_per_second": 0.269, "step": 500 }, { "epoch": 2.93, "learning_rate": 0.0002, "loss": 0.3205, "step": 520 }, { "epoch": 2.93, "eval_loss": 0.7056717276573181, "eval_runtime": 119.1245, "eval_samples_per_second": 16.789, "eval_steps_per_second": 0.269, "step": 520 }, { "epoch": 3.05, "learning_rate": 0.0002, "loss": 0.2615, "step": 540 }, { "epoch": 3.05, "eval_loss": 0.7545396685600281, "eval_runtime": 119.0492, "eval_samples_per_second": 16.8, "eval_steps_per_second": 0.269, "step": 540 }, { "epoch": 3.16, "learning_rate": 0.0002, "loss": 0.1976, "step": 560 }, { "epoch": 3.16, "eval_loss": 0.7487218976020813, "eval_runtime": 119.0352, "eval_samples_per_second": 16.802, "eval_steps_per_second": 0.269, "step": 560 }, { "epoch": 3.27, "learning_rate": 0.0002, "loss": 0.1918, "step": 580 }, { "epoch": 3.27, "eval_loss": 0.7490907907485962, "eval_runtime": 119.0491, "eval_samples_per_second": 16.8, "eval_steps_per_second": 0.269, "step": 580 }, { "epoch": 3.38, "learning_rate": 0.0002, "loss": 0.1928, "step": 600 }, { "epoch": 3.38, "eval_loss": 0.7635614275932312, "eval_runtime": 119.0235, "eval_samples_per_second": 16.803, "eval_steps_per_second": 0.269, "step": 600 }, { "epoch": 3.5, "learning_rate": 0.0002, "loss": 0.2003, "step": 620 }, { "epoch": 3.5, "eval_loss": 0.7481739521026611, "eval_runtime": 119.0638, "eval_samples_per_second": 16.798, "eval_steps_per_second": 0.269, "step": 620 }, { "epoch": 3.61, "learning_rate": 0.0002, "loss": 0.2067, "step": 640 }, { "epoch": 3.61, "eval_loss": 0.75593101978302, "eval_runtime": 119.0231, "eval_samples_per_second": 16.803, "eval_steps_per_second": 0.269, "step": 640 }, { "epoch": 3.72, "learning_rate": 0.0002, "loss": 0.2033, "step": 660 }, { "epoch": 3.72, "eval_loss": 0.7634627819061279, "eval_runtime": 119.0637, "eval_samples_per_second": 16.798, "eval_steps_per_second": 0.269, "step": 660 }, { "epoch": 3.84, "learning_rate": 0.0002, "loss": 0.205, "step": 680 }, { "epoch": 3.84, "eval_loss": 0.7572166323661804, "eval_runtime": 119.0532, "eval_samples_per_second": 16.799, "eval_steps_per_second": 0.269, "step": 680 }, { "epoch": 3.95, "learning_rate": 0.0002, "loss": 0.2084, "step": 700 }, { "epoch": 3.95, "eval_loss": 0.7648661136627197, "eval_runtime": 119.0646, "eval_samples_per_second": 16.798, "eval_steps_per_second": 0.269, "step": 700 }, { "epoch": 4.06, "learning_rate": 0.0002, "loss": 0.1671, "step": 720 }, { "epoch": 4.06, "eval_loss": 0.7852738499641418, "eval_runtime": 119.0359, "eval_samples_per_second": 16.802, "eval_steps_per_second": 0.269, "step": 720 }, { "epoch": 4.17, "learning_rate": 0.0002, "loss": 0.1243, "step": 740 }, { "epoch": 4.17, "eval_loss": 0.8128136396408081, "eval_runtime": 119.0666, "eval_samples_per_second": 16.797, "eval_steps_per_second": 0.269, "step": 740 }, { "before_init_mem_cpu": 2218475520, "before_init_mem_gpu": 8489038848, "epoch": 4.23, "init_mem_cpu_alloc_delta": 0, "init_mem_cpu_peaked_delta": 0, "init_mem_gpu_alloc_delta": 0, "init_mem_gpu_peaked_delta": 0, "step": 750, "total_flos": 7.992600445740646e+17, "train_loss": 0.4159141844113668, "train_mem_cpu_alloc_delta": 4575854592, "train_mem_cpu_peaked_delta": 52412416, "train_mem_gpu_alloc_delta": 1833786880, "train_mem_gpu_peaked_delta": 32936068608, "train_runtime": 14741.9518, "train_samples_per_second": 6.512, "train_steps_per_second": 0.051 } ], "max_steps": 750, "num_train_epochs": 5, "total_flos": 7.992600445740646e+17, "trial_name": null, "trial_params": null }