| { | |
| "best_metric": 0.42429444193840027, | |
| "best_model_checkpoint": "t5/checkpoint-3921268", | |
| "epoch": 73.0, | |
| "global_step": 3921268, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 1.0, | |
| "learning_rate": 0.00099, | |
| "loss": 0.6596, | |
| "step": 53716 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "eval_bleu": 7.691952957568906, | |
| "eval_loss": 0.5863233804702759, | |
| "eval_runtime": 6528.3322, | |
| "eval_samples_per_second": 16.456, | |
| "eval_steps_per_second": 1.029, | |
| "step": 53716 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "learning_rate": 0.00098, | |
| "loss": 0.5807, | |
| "step": 107432 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "eval_bleu": 7.596172987179689, | |
| "eval_loss": 0.5534030199050903, | |
| "eval_runtime": 6601.0537, | |
| "eval_samples_per_second": 16.275, | |
| "eval_steps_per_second": 1.017, | |
| "step": 107432 | |
| }, | |
| { | |
| "epoch": 3.0, | |
| "learning_rate": 0.0009699999999999999, | |
| "loss": 0.5569, | |
| "step": 161148 | |
| }, | |
| { | |
| "epoch": 3.0, | |
| "eval_bleu": 7.149818476141028, | |
| "eval_loss": 0.538519024848938, | |
| "eval_runtime": 6606.0069, | |
| "eval_samples_per_second": 16.263, | |
| "eval_steps_per_second": 1.016, | |
| "step": 161148 | |
| }, | |
| { | |
| "epoch": 4.0, | |
| "learning_rate": 0.00096, | |
| "loss": 0.5441, | |
| "step": 214864 | |
| }, | |
| { | |
| "epoch": 4.0, | |
| "eval_bleu": 7.493924944402864, | |
| "eval_loss": 0.5301510691642761, | |
| "eval_runtime": 6228.7775, | |
| "eval_samples_per_second": 17.248, | |
| "eval_steps_per_second": 1.078, | |
| "step": 214864 | |
| }, | |
| { | |
| "epoch": 5.0, | |
| "learning_rate": 0.00095, | |
| "loss": 0.5349, | |
| "step": 268580 | |
| }, | |
| { | |
| "epoch": 5.0, | |
| "eval_bleu": 7.031863868448649, | |
| "eval_loss": 0.5224108695983887, | |
| "eval_runtime": 6611.0145, | |
| "eval_samples_per_second": 16.25, | |
| "eval_steps_per_second": 1.016, | |
| "step": 268580 | |
| }, | |
| { | |
| "epoch": 6.0, | |
| "learning_rate": 0.00094, | |
| "loss": 0.5281, | |
| "step": 322296 | |
| }, | |
| { | |
| "epoch": 6.0, | |
| "eval_bleu": 7.644179348361122, | |
| "eval_loss": 0.5192911028862, | |
| "eval_runtime": 6621.1034, | |
| "eval_samples_per_second": 16.226, | |
| "eval_steps_per_second": 1.014, | |
| "step": 322296 | |
| }, | |
| { | |
| "epoch": 7.0, | |
| "learning_rate": 0.00093, | |
| "loss": 0.5222, | |
| "step": 376012 | |
| }, | |
| { | |
| "epoch": 7.0, | |
| "eval_bleu": 7.607672700840728, | |
| "eval_loss": 0.5128632187843323, | |
| "eval_runtime": 6621.733, | |
| "eval_samples_per_second": 16.224, | |
| "eval_steps_per_second": 1.014, | |
| "step": 376012 | |
| }, | |
| { | |
| "epoch": 8.0, | |
| "learning_rate": 0.00092, | |
| "loss": 0.5181, | |
| "step": 429728 | |
| }, | |
| { | |
| "epoch": 8.0, | |
| "eval_bleu": 6.071314840861525, | |
| "eval_loss": 0.5084598064422607, | |
| "eval_runtime": 6242.5238, | |
| "eval_samples_per_second": 17.21, | |
| "eval_steps_per_second": 1.076, | |
| "step": 429728 | |
| }, | |
| { | |
| "epoch": 9.0, | |
| "learning_rate": 0.00091, | |
| "loss": 0.5137, | |
| "step": 483444 | |
| }, | |
| { | |
| "epoch": 9.0, | |
| "eval_bleu": 7.175821994303286, | |
| "eval_loss": 0.5051391124725342, | |
| "eval_runtime": 6238.6565, | |
| "eval_samples_per_second": 17.22, | |
| "eval_steps_per_second": 1.076, | |
| "step": 483444 | |
| }, | |
| { | |
| "epoch": 10.0, | |
| "learning_rate": 0.0009000000000000001, | |
| "loss": 0.5093, | |
| "step": 537160 | |
| }, | |
| { | |
| "epoch": 10.0, | |
| "eval_bleu": 7.716511125290912, | |
| "eval_loss": 0.5000638961791992, | |
| "eval_runtime": 6244.5271, | |
| "eval_samples_per_second": 17.204, | |
| "eval_steps_per_second": 1.075, | |
| "step": 537160 | |
| }, | |
| { | |
| "epoch": 11.0, | |
| "learning_rate": 0.0008900000000000001, | |
| "loss": 0.5037, | |
| "step": 590876 | |
| }, | |
| { | |
| "epoch": 11.0, | |
| "eval_bleu": 7.072832342346184, | |
| "eval_loss": 0.4958619177341461, | |
| "eval_runtime": 6248.1767, | |
| "eval_samples_per_second": 17.194, | |
| "eval_steps_per_second": 1.075, | |
| "step": 590876 | |
| }, | |
| { | |
| "epoch": 12.0, | |
| "learning_rate": 0.00088, | |
| "loss": 0.4992, | |
| "step": 644592 | |
| }, | |
| { | |
| "epoch": 12.0, | |
| "eval_bleu": 7.23951068440794, | |
| "eval_loss": 0.4918939471244812, | |
| "eval_runtime": 6240.6707, | |
| "eval_samples_per_second": 17.215, | |
| "eval_steps_per_second": 1.076, | |
| "step": 644592 | |
| }, | |
| { | |
| "epoch": 13.0, | |
| "learning_rate": 0.00087, | |
| "loss": 0.4954, | |
| "step": 698308 | |
| }, | |
| { | |
| "epoch": 13.0, | |
| "eval_bleu": 7.381643836163121, | |
| "eval_loss": 0.4886699914932251, | |
| "eval_runtime": 6245.5311, | |
| "eval_samples_per_second": 17.201, | |
| "eval_steps_per_second": 1.075, | |
| "step": 698308 | |
| }, | |
| { | |
| "epoch": 14.0, | |
| "learning_rate": 0.00086, | |
| "loss": 0.4915, | |
| "step": 752024 | |
| }, | |
| { | |
| "epoch": 14.0, | |
| "eval_bleu": 5.9857507598052075, | |
| "eval_loss": 0.4870322346687317, | |
| "eval_runtime": 6239.2065, | |
| "eval_samples_per_second": 17.219, | |
| "eval_steps_per_second": 1.076, | |
| "step": 752024 | |
| }, | |
| { | |
| "epoch": 15.0, | |
| "learning_rate": 0.00085, | |
| "loss": 0.488, | |
| "step": 805740 | |
| }, | |
| { | |
| "epoch": 15.0, | |
| "eval_bleu": 7.5727246325090976, | |
| "eval_loss": 0.4828002154827118, | |
| "eval_runtime": 6235.1501, | |
| "eval_samples_per_second": 17.23, | |
| "eval_steps_per_second": 1.077, | |
| "step": 805740 | |
| }, | |
| { | |
| "epoch": 16.0, | |
| "learning_rate": 0.00084, | |
| "loss": 0.4862, | |
| "step": 859456 | |
| }, | |
| { | |
| "epoch": 16.0, | |
| "eval_bleu": 7.59567809120864, | |
| "eval_loss": 0.4813084900379181, | |
| "eval_runtime": 6235.7161, | |
| "eval_samples_per_second": 17.228, | |
| "eval_steps_per_second": 1.077, | |
| "step": 859456 | |
| }, | |
| { | |
| "epoch": 17.0, | |
| "learning_rate": 0.00083, | |
| "loss": 0.4827, | |
| "step": 913172 | |
| }, | |
| { | |
| "epoch": 17.0, | |
| "eval_bleu": 7.1431546130798385, | |
| "eval_loss": 0.4796863794326782, | |
| "eval_runtime": 6236.1656, | |
| "eval_samples_per_second": 17.227, | |
| "eval_steps_per_second": 1.077, | |
| "step": 913172 | |
| }, | |
| { | |
| "epoch": 18.0, | |
| "learning_rate": 0.00082, | |
| "loss": 0.4798, | |
| "step": 966888 | |
| }, | |
| { | |
| "epoch": 18.0, | |
| "eval_bleu": 7.563058401551067, | |
| "eval_loss": 0.476810485124588, | |
| "eval_runtime": 6252.4371, | |
| "eval_samples_per_second": 17.182, | |
| "eval_steps_per_second": 1.074, | |
| "step": 966888 | |
| }, | |
| { | |
| "epoch": 19.0, | |
| "learning_rate": 0.0008100000000000001, | |
| "loss": 0.4767, | |
| "step": 1020604 | |
| }, | |
| { | |
| "epoch": 19.0, | |
| "eval_bleu": 7.242193570088235, | |
| "eval_loss": 0.47421401739120483, | |
| "eval_runtime": 6234.3401, | |
| "eval_samples_per_second": 17.232, | |
| "eval_steps_per_second": 1.077, | |
| "step": 1020604 | |
| }, | |
| { | |
| "epoch": 20.0, | |
| "learning_rate": 0.0008, | |
| "loss": 0.4748, | |
| "step": 1074320 | |
| }, | |
| { | |
| "epoch": 20.0, | |
| "eval_bleu": 6.120830355327935, | |
| "eval_loss": 0.47452765703201294, | |
| "eval_runtime": 6255.864, | |
| "eval_samples_per_second": 17.173, | |
| "eval_steps_per_second": 1.073, | |
| "step": 1074320 | |
| }, | |
| { | |
| "epoch": 21.0, | |
| "learning_rate": 0.00079, | |
| "loss": 0.4735, | |
| "step": 1128036 | |
| }, | |
| { | |
| "epoch": 21.0, | |
| "eval_bleu": 7.641822854665483, | |
| "eval_loss": 0.4723513424396515, | |
| "eval_runtime": 6304.5815, | |
| "eval_samples_per_second": 17.04, | |
| "eval_steps_per_second": 1.065, | |
| "step": 1128036 | |
| }, | |
| { | |
| "epoch": 22.0, | |
| "learning_rate": 0.0007800000000000001, | |
| "loss": 0.4716, | |
| "step": 1181752 | |
| }, | |
| { | |
| "epoch": 22.0, | |
| "eval_bleu": 7.3059537693760594, | |
| "eval_loss": 0.4718638062477112, | |
| "eval_runtime": 6317.7024, | |
| "eval_samples_per_second": 17.005, | |
| "eval_steps_per_second": 1.063, | |
| "step": 1181752 | |
| }, | |
| { | |
| "epoch": 23.0, | |
| "learning_rate": 0.0007700000000000001, | |
| "loss": 0.469, | |
| "step": 1235468 | |
| }, | |
| { | |
| "epoch": 23.0, | |
| "eval_bleu": 7.598346638071266, | |
| "eval_loss": 0.46901023387908936, | |
| "eval_runtime": 6308.4041, | |
| "eval_samples_per_second": 17.03, | |
| "eval_steps_per_second": 1.064, | |
| "step": 1235468 | |
| }, | |
| { | |
| "epoch": 24.0, | |
| "learning_rate": 0.00076, | |
| "loss": 0.4669, | |
| "step": 1289184 | |
| }, | |
| { | |
| "epoch": 24.0, | |
| "eval_bleu": 6.799435285671091, | |
| "eval_loss": 0.46680623292922974, | |
| "eval_runtime": 6315.2629, | |
| "eval_samples_per_second": 17.011, | |
| "eval_steps_per_second": 1.063, | |
| "step": 1289184 | |
| }, | |
| { | |
| "epoch": 25.0, | |
| "learning_rate": 0.00075, | |
| "loss": 0.4641, | |
| "step": 1342900 | |
| }, | |
| { | |
| "epoch": 25.0, | |
| "eval_bleu": 7.62775725124654, | |
| "eval_loss": 0.4666709899902344, | |
| "eval_runtime": 6262.5243, | |
| "eval_samples_per_second": 17.155, | |
| "eval_steps_per_second": 1.072, | |
| "step": 1342900 | |
| }, | |
| { | |
| "epoch": 26.0, | |
| "learning_rate": 0.00074, | |
| "loss": 0.4618, | |
| "step": 1396616 | |
| }, | |
| { | |
| "epoch": 26.0, | |
| "eval_bleu": 6.969418527447973, | |
| "eval_loss": 0.4641306698322296, | |
| "eval_runtime": 6239.5276, | |
| "eval_samples_per_second": 17.218, | |
| "eval_steps_per_second": 1.076, | |
| "step": 1396616 | |
| }, | |
| { | |
| "epoch": 27.0, | |
| "learning_rate": 0.00073, | |
| "loss": 0.4606, | |
| "step": 1450332 | |
| }, | |
| { | |
| "epoch": 27.0, | |
| "eval_bleu": 7.458516781341554, | |
| "eval_loss": 0.4627404510974884, | |
| "eval_runtime": 6245.0374, | |
| "eval_samples_per_second": 17.203, | |
| "eval_steps_per_second": 1.075, | |
| "step": 1450332 | |
| }, | |
| { | |
| "epoch": 28.0, | |
| "learning_rate": 0.0007199999999999999, | |
| "loss": 0.4582, | |
| "step": 1504048 | |
| }, | |
| { | |
| "epoch": 28.0, | |
| "eval_bleu": 6.694355343847021, | |
| "eval_loss": 0.46318283677101135, | |
| "eval_runtime": 6248.0853, | |
| "eval_samples_per_second": 17.194, | |
| "eval_steps_per_second": 1.075, | |
| "step": 1504048 | |
| }, | |
| { | |
| "epoch": 29.0, | |
| "learning_rate": 0.00071, | |
| "loss": 0.4569, | |
| "step": 1557764 | |
| }, | |
| { | |
| "epoch": 29.0, | |
| "eval_bleu": 7.538791367466209, | |
| "eval_loss": 0.46087339520454407, | |
| "eval_runtime": 6241.0141, | |
| "eval_samples_per_second": 17.214, | |
| "eval_steps_per_second": 1.076, | |
| "step": 1557764 | |
| }, | |
| { | |
| "epoch": 30.0, | |
| "learning_rate": 0.0007, | |
| "loss": 0.4548, | |
| "step": 1611480 | |
| }, | |
| { | |
| "epoch": 30.0, | |
| "eval_bleu": 7.528860869957395, | |
| "eval_loss": 0.4588477909564972, | |
| "eval_runtime": 6242.3068, | |
| "eval_samples_per_second": 17.21, | |
| "eval_steps_per_second": 1.076, | |
| "step": 1611480 | |
| }, | |
| { | |
| "epoch": 31.0, | |
| "learning_rate": 0.00069, | |
| "loss": 0.4537, | |
| "step": 1665196 | |
| }, | |
| { | |
| "epoch": 31.0, | |
| "eval_bleu": 7.362216478280285, | |
| "eval_loss": 0.4597391188144684, | |
| "eval_runtime": 6244.4866, | |
| "eval_samples_per_second": 17.204, | |
| "eval_steps_per_second": 1.075, | |
| "step": 1665196 | |
| }, | |
| { | |
| "epoch": 32.0, | |
| "learning_rate": 0.00068, | |
| "loss": 0.4513, | |
| "step": 1718912 | |
| }, | |
| { | |
| "epoch": 32.0, | |
| "eval_bleu": 7.137390175844847, | |
| "eval_loss": 0.4572164714336395, | |
| "eval_runtime": 6244.2148, | |
| "eval_samples_per_second": 17.205, | |
| "eval_steps_per_second": 1.075, | |
| "step": 1718912 | |
| }, | |
| { | |
| "epoch": 33.0, | |
| "learning_rate": 0.00067, | |
| "loss": 0.4485, | |
| "step": 1772628 | |
| }, | |
| { | |
| "epoch": 33.0, | |
| "eval_bleu": 7.081305145228205, | |
| "eval_loss": 0.45658349990844727, | |
| "eval_runtime": 6241.0726, | |
| "eval_samples_per_second": 17.214, | |
| "eval_steps_per_second": 1.076, | |
| "step": 1772628 | |
| }, | |
| { | |
| "epoch": 34.0, | |
| "learning_rate": 0.00066, | |
| "loss": 0.4469, | |
| "step": 1826344 | |
| }, | |
| { | |
| "epoch": 34.0, | |
| "eval_bleu": 7.065210289724078, | |
| "eval_loss": 0.4544486701488495, | |
| "eval_runtime": 6253.2099, | |
| "eval_samples_per_second": 17.18, | |
| "eval_steps_per_second": 1.074, | |
| "step": 1826344 | |
| }, | |
| { | |
| "epoch": 35.0, | |
| "learning_rate": 0.0006500000000000001, | |
| "loss": 0.4449, | |
| "step": 1880060 | |
| }, | |
| { | |
| "epoch": 35.0, | |
| "eval_bleu": 7.378548531953654, | |
| "eval_loss": 0.4559008777141571, | |
| "eval_runtime": 6234.4769, | |
| "eval_samples_per_second": 17.232, | |
| "eval_steps_per_second": 1.077, | |
| "step": 1880060 | |
| }, | |
| { | |
| "epoch": 36.0, | |
| "learning_rate": 0.00064, | |
| "loss": 0.4442, | |
| "step": 1933776 | |
| }, | |
| { | |
| "epoch": 36.0, | |
| "eval_bleu": 7.356901577029033, | |
| "eval_loss": 0.4534740746021271, | |
| "eval_runtime": 6249.755, | |
| "eval_samples_per_second": 17.19, | |
| "eval_steps_per_second": 1.074, | |
| "step": 1933776 | |
| }, | |
| { | |
| "epoch": 37.0, | |
| "learning_rate": 0.00063, | |
| "loss": 0.4431, | |
| "step": 1987492 | |
| }, | |
| { | |
| "epoch": 37.0, | |
| "eval_bleu": 7.175291475992041, | |
| "eval_loss": 0.45327481627464294, | |
| "eval_runtime": 6249.6092, | |
| "eval_samples_per_second": 17.19, | |
| "eval_steps_per_second": 1.074, | |
| "step": 1987492 | |
| }, | |
| { | |
| "epoch": 38.0, | |
| "learning_rate": 0.00062, | |
| "loss": 0.441, | |
| "step": 2041208 | |
| }, | |
| { | |
| "epoch": 38.0, | |
| "eval_bleu": 7.359022144163392, | |
| "eval_loss": 0.4524107277393341, | |
| "eval_runtime": 6240.8973, | |
| "eval_samples_per_second": 17.214, | |
| "eval_steps_per_second": 1.076, | |
| "step": 2041208 | |
| }, | |
| { | |
| "epoch": 39.0, | |
| "learning_rate": 0.00061, | |
| "loss": 0.4387, | |
| "step": 2094924 | |
| }, | |
| { | |
| "epoch": 39.0, | |
| "eval_bleu": 7.5283460331563745, | |
| "eval_loss": 0.4496091306209564, | |
| "eval_runtime": 6237.5918, | |
| "eval_samples_per_second": 17.223, | |
| "eval_steps_per_second": 1.077, | |
| "step": 2094924 | |
| }, | |
| { | |
| "epoch": 40.0, | |
| "learning_rate": 0.0006, | |
| "loss": 0.4359, | |
| "step": 2148640 | |
| }, | |
| { | |
| "epoch": 40.0, | |
| "eval_bleu": 7.5346208014087495, | |
| "eval_loss": 0.44786250591278076, | |
| "eval_runtime": 6243.0368, | |
| "eval_samples_per_second": 17.208, | |
| "eval_steps_per_second": 1.076, | |
| "step": 2148640 | |
| }, | |
| { | |
| "epoch": 41.0, | |
| "learning_rate": 0.00059, | |
| "loss": 0.4338, | |
| "step": 2202356 | |
| }, | |
| { | |
| "epoch": 41.0, | |
| "eval_bleu": 7.406528761971476, | |
| "eval_loss": 0.44740021228790283, | |
| "eval_runtime": 6247.6804, | |
| "eval_samples_per_second": 17.195, | |
| "eval_steps_per_second": 1.075, | |
| "step": 2202356 | |
| }, | |
| { | |
| "epoch": 42.0, | |
| "learning_rate": 0.00058, | |
| "loss": 0.4319, | |
| "step": 2256072 | |
| }, | |
| { | |
| "epoch": 42.0, | |
| "eval_bleu": 7.192159097527976, | |
| "eval_loss": 0.447433739900589, | |
| "eval_runtime": 6248.8648, | |
| "eval_samples_per_second": 17.192, | |
| "eval_steps_per_second": 1.075, | |
| "step": 2256072 | |
| }, | |
| { | |
| "epoch": 43.0, | |
| "learning_rate": 0.00057, | |
| "loss": 0.43, | |
| "step": 2309788 | |
| }, | |
| { | |
| "epoch": 43.0, | |
| "eval_bleu": 7.325069602605064, | |
| "eval_loss": 0.4456492066383362, | |
| "eval_runtime": 6263.0673, | |
| "eval_samples_per_second": 17.153, | |
| "eval_steps_per_second": 1.072, | |
| "step": 2309788 | |
| }, | |
| { | |
| "epoch": 44.0, | |
| "learning_rate": 0.0005600000000000001, | |
| "loss": 0.4279, | |
| "step": 2363504 | |
| }, | |
| { | |
| "epoch": 44.0, | |
| "eval_bleu": 7.532048814014251, | |
| "eval_loss": 0.4445250332355499, | |
| "eval_runtime": 6241.8298, | |
| "eval_samples_per_second": 17.211, | |
| "eval_steps_per_second": 1.076, | |
| "step": 2363504 | |
| }, | |
| { | |
| "epoch": 45.0, | |
| "learning_rate": 0.00055, | |
| "loss": 0.426, | |
| "step": 2417220 | |
| }, | |
| { | |
| "epoch": 45.0, | |
| "eval_bleu": 7.174420155924515, | |
| "eval_loss": 0.44330111145973206, | |
| "eval_runtime": 6231.5445, | |
| "eval_samples_per_second": 17.24, | |
| "eval_steps_per_second": 1.078, | |
| "step": 2417220 | |
| }, | |
| { | |
| "epoch": 46.0, | |
| "learning_rate": 0.00054, | |
| "loss": 0.4239, | |
| "step": 2470936 | |
| }, | |
| { | |
| "epoch": 46.0, | |
| "eval_bleu": 7.653281782827262, | |
| "eval_loss": 0.44130608439445496, | |
| "eval_runtime": 6244.5962, | |
| "eval_samples_per_second": 17.204, | |
| "eval_steps_per_second": 1.075, | |
| "step": 2470936 | |
| }, | |
| { | |
| "epoch": 47.0, | |
| "learning_rate": 0.0005300000000000001, | |
| "loss": 0.422, | |
| "step": 2524652 | |
| }, | |
| { | |
| "epoch": 47.0, | |
| "eval_bleu": 7.358951072022719, | |
| "eval_loss": 0.4416486620903015, | |
| "eval_runtime": 6246.4215, | |
| "eval_samples_per_second": 17.199, | |
| "eval_steps_per_second": 1.075, | |
| "step": 2524652 | |
| }, | |
| { | |
| "epoch": 48.0, | |
| "learning_rate": 0.0005200000000000001, | |
| "loss": 0.4206, | |
| "step": 2578368 | |
| }, | |
| { | |
| "epoch": 48.0, | |
| "eval_bleu": 6.700995294592222, | |
| "eval_loss": 0.441184937953949, | |
| "eval_runtime": 6248.9024, | |
| "eval_samples_per_second": 17.192, | |
| "eval_steps_per_second": 1.075, | |
| "step": 2578368 | |
| }, | |
| { | |
| "epoch": 49.0, | |
| "learning_rate": 0.00051, | |
| "loss": 0.4186, | |
| "step": 2632084 | |
| }, | |
| { | |
| "epoch": 49.0, | |
| "eval_bleu": 7.428626778422992, | |
| "eval_loss": 0.44076189398765564, | |
| "eval_runtime": 6244.5819, | |
| "eval_samples_per_second": 17.204, | |
| "eval_steps_per_second": 1.075, | |
| "step": 2632084 | |
| }, | |
| { | |
| "epoch": 50.0, | |
| "learning_rate": 0.0005, | |
| "loss": 0.416, | |
| "step": 2685800 | |
| }, | |
| { | |
| "epoch": 50.0, | |
| "eval_bleu": 7.538386131386865, | |
| "eval_loss": 0.43902388215065, | |
| "eval_runtime": 6236.1942, | |
| "eval_samples_per_second": 17.227, | |
| "eval_steps_per_second": 1.077, | |
| "step": 2685800 | |
| }, | |
| { | |
| "epoch": 51.0, | |
| "learning_rate": 0.00049, | |
| "loss": 0.4145, | |
| "step": 2739516 | |
| }, | |
| { | |
| "epoch": 51.0, | |
| "eval_bleu": 7.177849858240658, | |
| "eval_loss": 0.4388711452484131, | |
| "eval_runtime": 6244.4397, | |
| "eval_samples_per_second": 17.204, | |
| "eval_steps_per_second": 1.075, | |
| "step": 2739516 | |
| }, | |
| { | |
| "epoch": 52.0, | |
| "learning_rate": 0.00048, | |
| "loss": 0.4115, | |
| "step": 2793232 | |
| }, | |
| { | |
| "epoch": 52.0, | |
| "eval_bleu": 7.3825806146338895, | |
| "eval_loss": 0.43703773617744446, | |
| "eval_runtime": 6246.8394, | |
| "eval_samples_per_second": 17.198, | |
| "eval_steps_per_second": 1.075, | |
| "step": 2793232 | |
| }, | |
| { | |
| "epoch": 53.0, | |
| "learning_rate": 0.00047, | |
| "loss": 0.4091, | |
| "step": 2846948 | |
| }, | |
| { | |
| "epoch": 53.0, | |
| "eval_bleu": 7.354983260965792, | |
| "eval_loss": 0.4351899325847626, | |
| "eval_runtime": 6249.7526, | |
| "eval_samples_per_second": 17.19, | |
| "eval_steps_per_second": 1.074, | |
| "step": 2846948 | |
| }, | |
| { | |
| "epoch": 54.0, | |
| "learning_rate": 0.00046, | |
| "loss": 0.4062, | |
| "step": 2900664 | |
| }, | |
| { | |
| "epoch": 54.0, | |
| "eval_bleu": 7.070865789057057, | |
| "eval_loss": 0.4349888265132904, | |
| "eval_runtime": 6255.1992, | |
| "eval_samples_per_second": 17.175, | |
| "eval_steps_per_second": 1.074, | |
| "step": 2900664 | |
| }, | |
| { | |
| "epoch": 55.0, | |
| "learning_rate": 0.00045000000000000004, | |
| "loss": 0.4038, | |
| "step": 2954380 | |
| }, | |
| { | |
| "epoch": 55.0, | |
| "eval_bleu": 7.724805289860729, | |
| "eval_loss": 0.4359044134616852, | |
| "eval_runtime": 6250.2651, | |
| "eval_samples_per_second": 17.188, | |
| "eval_steps_per_second": 1.074, | |
| "step": 2954380 | |
| }, | |
| { | |
| "epoch": 56.0, | |
| "learning_rate": 0.00044, | |
| "loss": 0.402, | |
| "step": 3008096 | |
| }, | |
| { | |
| "epoch": 56.0, | |
| "eval_bleu": 7.154898411407466, | |
| "eval_loss": 0.4326974153518677, | |
| "eval_runtime": 6264.3019, | |
| "eval_samples_per_second": 17.15, | |
| "eval_steps_per_second": 1.072, | |
| "step": 3008096 | |
| }, | |
| { | |
| "epoch": 57.0, | |
| "learning_rate": 0.00043, | |
| "loss": 0.3995, | |
| "step": 3061812 | |
| }, | |
| { | |
| "epoch": 57.0, | |
| "eval_bleu": 7.508317247767554, | |
| "eval_loss": 0.4333774149417877, | |
| "eval_runtime": 6245.5823, | |
| "eval_samples_per_second": 17.201, | |
| "eval_steps_per_second": 1.075, | |
| "step": 3061812 | |
| }, | |
| { | |
| "epoch": 58.0, | |
| "learning_rate": 0.00042, | |
| "loss": 0.3972, | |
| "step": 3115528 | |
| }, | |
| { | |
| "epoch": 58.0, | |
| "eval_bleu": 7.127695021274113, | |
| "eval_loss": 0.43104425072669983, | |
| "eval_runtime": 6238.945, | |
| "eval_samples_per_second": 17.219, | |
| "eval_steps_per_second": 1.076, | |
| "step": 3115528 | |
| }, | |
| { | |
| "epoch": 59.0, | |
| "learning_rate": 0.00041, | |
| "loss": 0.3942, | |
| "step": 3169244 | |
| }, | |
| { | |
| "epoch": 59.0, | |
| "eval_bleu": 6.749919689906369, | |
| "eval_loss": 0.4318625032901764, | |
| "eval_runtime": 6250.8033, | |
| "eval_samples_per_second": 17.187, | |
| "eval_steps_per_second": 1.074, | |
| "step": 3169244 | |
| }, | |
| { | |
| "epoch": 60.0, | |
| "learning_rate": 0.0004, | |
| "loss": 0.3921, | |
| "step": 3222960 | |
| }, | |
| { | |
| "epoch": 60.0, | |
| "eval_bleu": 7.099626120333918, | |
| "eval_loss": 0.4313414394855499, | |
| "eval_runtime": 6233.5496, | |
| "eval_samples_per_second": 17.234, | |
| "eval_steps_per_second": 1.077, | |
| "step": 3222960 | |
| }, | |
| { | |
| "epoch": 61.0, | |
| "learning_rate": 0.00039000000000000005, | |
| "loss": 0.3897, | |
| "step": 3276676 | |
| }, | |
| { | |
| "epoch": 61.0, | |
| "eval_bleu": 7.280842993868327, | |
| "eval_loss": 0.4297857880592346, | |
| "eval_runtime": 6255.1519, | |
| "eval_samples_per_second": 17.175, | |
| "eval_steps_per_second": 1.074, | |
| "step": 3276676 | |
| }, | |
| { | |
| "epoch": 62.0, | |
| "learning_rate": 0.00038, | |
| "loss": 0.3867, | |
| "step": 3330392 | |
| }, | |
| { | |
| "epoch": 62.0, | |
| "eval_bleu": 7.328384730172046, | |
| "eval_loss": 0.42802175879478455, | |
| "eval_runtime": 6245.3286, | |
| "eval_samples_per_second": 17.202, | |
| "eval_steps_per_second": 1.075, | |
| "step": 3330392 | |
| }, | |
| { | |
| "epoch": 63.0, | |
| "learning_rate": 0.00037, | |
| "loss": 0.3832, | |
| "step": 3384108 | |
| }, | |
| { | |
| "epoch": 63.0, | |
| "eval_bleu": 7.230903636346123, | |
| "eval_loss": 0.42855262756347656, | |
| "eval_runtime": 6240.2263, | |
| "eval_samples_per_second": 17.216, | |
| "eval_steps_per_second": 1.076, | |
| "step": 3384108 | |
| }, | |
| { | |
| "epoch": 64.0, | |
| "learning_rate": 0.00035999999999999997, | |
| "loss": 0.3807, | |
| "step": 3437824 | |
| }, | |
| { | |
| "epoch": 64.0, | |
| "eval_bleu": 7.557291062260919, | |
| "eval_loss": 0.4279802143573761, | |
| "eval_runtime": 6249.6778, | |
| "eval_samples_per_second": 17.19, | |
| "eval_steps_per_second": 1.074, | |
| "step": 3437824 | |
| }, | |
| { | |
| "epoch": 65.0, | |
| "learning_rate": 0.00035, | |
| "loss": 0.3779, | |
| "step": 3491540 | |
| }, | |
| { | |
| "epoch": 65.0, | |
| "eval_bleu": 7.456221414498501, | |
| "eval_loss": 0.42722874879837036, | |
| "eval_runtime": 6251.0806, | |
| "eval_samples_per_second": 17.186, | |
| "eval_steps_per_second": 1.074, | |
| "step": 3491540 | |
| }, | |
| { | |
| "epoch": 66.0, | |
| "learning_rate": 0.00034, | |
| "loss": 0.3746, | |
| "step": 3545256 | |
| }, | |
| { | |
| "epoch": 66.0, | |
| "eval_bleu": 7.38246671281172, | |
| "eval_loss": 0.4264260232448578, | |
| "eval_runtime": 6254.848, | |
| "eval_samples_per_second": 17.176, | |
| "eval_steps_per_second": 1.074, | |
| "step": 3545256 | |
| }, | |
| { | |
| "epoch": 67.0, | |
| "learning_rate": 0.00033, | |
| "loss": 0.3713, | |
| "step": 3598972 | |
| }, | |
| { | |
| "epoch": 67.0, | |
| "eval_bleu": 7.250094489059249, | |
| "eval_loss": 0.42612648010253906, | |
| "eval_runtime": 6253.4328, | |
| "eval_samples_per_second": 17.18, | |
| "eval_steps_per_second": 1.074, | |
| "step": 3598972 | |
| }, | |
| { | |
| "epoch": 68.0, | |
| "learning_rate": 0.00032, | |
| "loss": 0.3679, | |
| "step": 3652688 | |
| }, | |
| { | |
| "epoch": 68.0, | |
| "eval_bleu": 7.24260795309734, | |
| "eval_loss": 0.42605340480804443, | |
| "eval_runtime": 6249.7217, | |
| "eval_samples_per_second": 17.19, | |
| "eval_steps_per_second": 1.074, | |
| "step": 3652688 | |
| }, | |
| { | |
| "epoch": 69.0, | |
| "learning_rate": 0.00031, | |
| "loss": 0.3646, | |
| "step": 3706404 | |
| }, | |
| { | |
| "epoch": 69.0, | |
| "eval_bleu": 7.705633206021796, | |
| "eval_loss": 0.42531710863113403, | |
| "eval_runtime": 6255.4922, | |
| "eval_samples_per_second": 17.174, | |
| "eval_steps_per_second": 1.073, | |
| "step": 3706404 | |
| }, | |
| { | |
| "epoch": 70.0, | |
| "learning_rate": 0.0003, | |
| "loss": 0.3617, | |
| "step": 3760120 | |
| }, | |
| { | |
| "epoch": 70.0, | |
| "eval_bleu": 7.066333513511338, | |
| "eval_loss": 0.4245583415031433, | |
| "eval_runtime": 6242.6393, | |
| "eval_samples_per_second": 17.209, | |
| "eval_steps_per_second": 1.076, | |
| "step": 3760120 | |
| }, | |
| { | |
| "epoch": 71.0, | |
| "learning_rate": 0.00029, | |
| "loss": 0.3576, | |
| "step": 3813836 | |
| }, | |
| { | |
| "epoch": 71.0, | |
| "eval_bleu": 7.453191107022425, | |
| "eval_loss": 0.4248814582824707, | |
| "eval_runtime": 6236.6745, | |
| "eval_samples_per_second": 17.226, | |
| "eval_steps_per_second": 1.077, | |
| "step": 3813836 | |
| }, | |
| { | |
| "epoch": 72.0, | |
| "learning_rate": 0.00028000000000000003, | |
| "loss": 0.3538, | |
| "step": 3867552 | |
| }, | |
| { | |
| "epoch": 72.0, | |
| "eval_bleu": 7.399721264841341, | |
| "eval_loss": 0.42560645937919617, | |
| "eval_runtime": 6247.7185, | |
| "eval_samples_per_second": 17.195, | |
| "eval_steps_per_second": 1.075, | |
| "step": 3867552 | |
| }, | |
| { | |
| "epoch": 73.0, | |
| "learning_rate": 0.00027, | |
| "loss": 0.3498, | |
| "step": 3921268 | |
| }, | |
| { | |
| "epoch": 73.0, | |
| "eval_bleu": 7.23131596568943, | |
| "eval_loss": 0.42429444193840027, | |
| "eval_runtime": 6248.7182, | |
| "eval_samples_per_second": 17.192, | |
| "eval_steps_per_second": 1.075, | |
| "step": 3921268 | |
| } | |
| ], | |
| "max_steps": 5371600, | |
| "num_train_epochs": 100, | |
| "total_flos": 8.225066520360465e+18, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |