{ "best_global_step": null, "best_metric": null, "best_model_checkpoint": null, "epoch": 2.0, "eval_steps": 500, "global_step": 794, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0025188916876574307, "grad_norm": 29.875, "learning_rate": 2.3076923076923078e-07, "loss": 1.1914, "memory/device_mem_reserved(gib)": 80.65, "memory/max_mem_active(gib)": 67.28, "memory/max_mem_allocated(gib)": 67.28, "step": 1 }, { "epoch": 0.005037783375314861, "grad_norm": 26.875, "learning_rate": 4.6153846153846156e-07, "loss": 1.1719, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 2 }, { "epoch": 0.007556675062972292, "grad_norm": 28.0, "learning_rate": 6.923076923076923e-07, "loss": 1.166, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 3 }, { "epoch": 0.010075566750629723, "grad_norm": 61.25, "learning_rate": 9.230769230769231e-07, "loss": 1.6523, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 4 }, { "epoch": 0.012594458438287154, "grad_norm": 7.1875, "learning_rate": 1.1538461538461538e-06, "loss": 1.1035, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 5 }, { "epoch": 0.015113350125944584, "grad_norm": 6.0625, "learning_rate": 1.3846153846153846e-06, "loss": 1.1689, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 6 }, { "epoch": 0.017632241813602016, "grad_norm": 5.03125, "learning_rate": 1.6153846153846154e-06, "loss": 1.0928, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 7 }, { "epoch": 0.020151133501259445, "grad_norm": 9.3125, "learning_rate": 1.8461538461538462e-06, "loss": 1.0908, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 8 }, { "epoch": 0.022670025188916875, "grad_norm": 4.625, "learning_rate": 2.076923076923077e-06, "loss": 1.0566, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 9 }, { "epoch": 0.02518891687657431, "grad_norm": 3.59375, "learning_rate": 2.3076923076923077e-06, "loss": 1.0049, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 10 }, { "epoch": 0.027707808564231738, "grad_norm": 2.40625, "learning_rate": 2.5384615384615385e-06, "loss": 1.0156, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 11 }, { "epoch": 0.030226700251889168, "grad_norm": 2.34375, "learning_rate": 2.7692307692307693e-06, "loss": 0.9209, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 12 }, { "epoch": 0.0327455919395466, "grad_norm": 4.71875, "learning_rate": 3e-06, "loss": 1.1035, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 13 }, { "epoch": 0.03526448362720403, "grad_norm": 3.09375, "learning_rate": 3.230769230769231e-06, "loss": 1.0547, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 14 }, { "epoch": 0.037783375314861464, "grad_norm": 1.765625, "learning_rate": 3.4615384615384617e-06, "loss": 0.9268, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 15 }, { "epoch": 0.04030226700251889, "grad_norm": 2.171875, "learning_rate": 3.6923076923076925e-06, "loss": 1.0215, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 16 }, { "epoch": 0.042821158690176324, "grad_norm": 5.1875, "learning_rate": 3.923076923076923e-06, "loss": 0.9707, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 17 }, { "epoch": 0.04534005037783375, "grad_norm": 4.09375, "learning_rate": 4.153846153846154e-06, "loss": 0.959, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 18 }, { "epoch": 0.04785894206549118, "grad_norm": 3.421875, "learning_rate": 4.384615384615385e-06, "loss": 1.0137, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 19 }, { "epoch": 0.05037783375314862, "grad_norm": 2.71875, "learning_rate": 4.615384615384615e-06, "loss": 1.0137, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 20 }, { "epoch": 0.05289672544080604, "grad_norm": 1.6796875, "learning_rate": 4.8461538461538465e-06, "loss": 0.9062, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 21 }, { "epoch": 0.055415617128463476, "grad_norm": 2.109375, "learning_rate": 5.076923076923077e-06, "loss": 0.875, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 22 }, { "epoch": 0.05793450881612091, "grad_norm": 2.609375, "learning_rate": 5.307692307692308e-06, "loss": 0.9893, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 23 }, { "epoch": 0.060453400503778336, "grad_norm": 5.71875, "learning_rate": 5.5384615384615385e-06, "loss": 1.0166, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 24 }, { "epoch": 0.06297229219143577, "grad_norm": 1.640625, "learning_rate": 5.769230769230769e-06, "loss": 0.9834, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 25 }, { "epoch": 0.0654911838790932, "grad_norm": 1.390625, "learning_rate": 6e-06, "loss": 0.8379, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 26 }, { "epoch": 0.06801007556675064, "grad_norm": 1.4765625, "learning_rate": 6.2307692307692305e-06, "loss": 0.9717, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 27 }, { "epoch": 0.07052896725440806, "grad_norm": 1.453125, "learning_rate": 6.461538461538462e-06, "loss": 0.873, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 28 }, { "epoch": 0.07304785894206549, "grad_norm": 1.40625, "learning_rate": 6.692307692307692e-06, "loss": 0.9424, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 29 }, { "epoch": 0.07556675062972293, "grad_norm": 2.21875, "learning_rate": 6.923076923076923e-06, "loss": 0.9629, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 30 }, { "epoch": 0.07808564231738035, "grad_norm": 1.6875, "learning_rate": 7.153846153846154e-06, "loss": 0.8477, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 31 }, { "epoch": 0.08060453400503778, "grad_norm": 1.390625, "learning_rate": 7.384615384615385e-06, "loss": 0.9854, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 32 }, { "epoch": 0.08312342569269521, "grad_norm": 1.609375, "learning_rate": 7.615384615384615e-06, "loss": 0.9248, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 33 }, { "epoch": 0.08564231738035265, "grad_norm": 1.3828125, "learning_rate": 7.846153846153847e-06, "loss": 0.9873, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 34 }, { "epoch": 0.08816120906801007, "grad_norm": 1.2890625, "learning_rate": 8.076923076923077e-06, "loss": 0.8662, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 35 }, { "epoch": 0.0906801007556675, "grad_norm": 1.984375, "learning_rate": 8.307692307692307e-06, "loss": 0.835, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 36 }, { "epoch": 0.09319899244332494, "grad_norm": 1.546875, "learning_rate": 8.53846153846154e-06, "loss": 0.9141, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 37 }, { "epoch": 0.09571788413098237, "grad_norm": 1.453125, "learning_rate": 8.76923076923077e-06, "loss": 0.9336, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 38 }, { "epoch": 0.0982367758186398, "grad_norm": 1.3515625, "learning_rate": 9e-06, "loss": 0.959, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 39 }, { "epoch": 0.10075566750629723, "grad_norm": 1.3828125, "learning_rate": 8.998925871900279e-06, "loss": 0.8184, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 40 }, { "epoch": 0.10327455919395466, "grad_norm": 1.4765625, "learning_rate": 8.997849176845461e-06, "loss": 0.9004, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 41 }, { "epoch": 0.10579345088161209, "grad_norm": 2.1875, "learning_rate": 8.996769905622757e-06, "loss": 0.9365, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 42 }, { "epoch": 0.10831234256926953, "grad_norm": 1.6953125, "learning_rate": 8.995688048975247e-06, "loss": 0.9414, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 43 }, { "epoch": 0.11083123425692695, "grad_norm": 2.234375, "learning_rate": 8.994603597601599e-06, "loss": 1.0234, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 44 }, { "epoch": 0.11335012594458438, "grad_norm": 1.390625, "learning_rate": 8.993516542155818e-06, "loss": 0.8896, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 45 }, { "epoch": 0.11586901763224182, "grad_norm": 1.1328125, "learning_rate": 8.992426873246962e-06, "loss": 0.8965, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 46 }, { "epoch": 0.11838790931989925, "grad_norm": 1.703125, "learning_rate": 8.991334581438888e-06, "loss": 0.8301, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 47 }, { "epoch": 0.12090680100755667, "grad_norm": 1.875, "learning_rate": 8.990239657249966e-06, "loss": 0.8691, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 48 }, { "epoch": 0.12342569269521411, "grad_norm": 1.4375, "learning_rate": 8.989142091152815e-06, "loss": 0.9229, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 49 }, { "epoch": 0.12594458438287154, "grad_norm": 1.390625, "learning_rate": 8.988041873574018e-06, "loss": 0.9941, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 50 }, { "epoch": 0.12846347607052896, "grad_norm": 1.5625, "learning_rate": 8.986938994893847e-06, "loss": 0.8838, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 51 }, { "epoch": 0.1309823677581864, "grad_norm": 1.234375, "learning_rate": 8.985833445445984e-06, "loss": 0.8643, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 52 }, { "epoch": 0.13350125944584382, "grad_norm": 1.515625, "learning_rate": 8.984725215517241e-06, "loss": 0.9756, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 53 }, { "epoch": 0.13602015113350127, "grad_norm": 1.453125, "learning_rate": 8.98361429534727e-06, "loss": 0.8311, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 54 }, { "epoch": 0.1385390428211587, "grad_norm": 1.46875, "learning_rate": 8.982500675128276e-06, "loss": 0.8545, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 55 }, { "epoch": 0.14105793450881612, "grad_norm": 1.328125, "learning_rate": 8.981384345004732e-06, "loss": 0.9189, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 56 }, { "epoch": 0.14357682619647355, "grad_norm": 1.5078125, "learning_rate": 8.980265295073092e-06, "loss": 0.8623, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 57 }, { "epoch": 0.14609571788413098, "grad_norm": 1.203125, "learning_rate": 8.979143515381489e-06, "loss": 0.8184, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 58 }, { "epoch": 0.1486146095717884, "grad_norm": 1.453125, "learning_rate": 8.978018995929444e-06, "loss": 0.8418, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 59 }, { "epoch": 0.15113350125944586, "grad_norm": 1.1953125, "learning_rate": 8.976891726667572e-06, "loss": 0.9297, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 60 }, { "epoch": 0.15365239294710328, "grad_norm": 1.1953125, "learning_rate": 8.97576169749728e-06, "loss": 0.8945, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 61 }, { "epoch": 0.1561712846347607, "grad_norm": 1.578125, "learning_rate": 8.974628898270462e-06, "loss": 0.9258, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 62 }, { "epoch": 0.15869017632241814, "grad_norm": 1.234375, "learning_rate": 8.9734933187892e-06, "loss": 0.8389, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 63 }, { "epoch": 0.16120906801007556, "grad_norm": 1.1640625, "learning_rate": 8.97235494880546e-06, "loss": 0.8242, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 64 }, { "epoch": 0.163727959697733, "grad_norm": 2.734375, "learning_rate": 8.971213778020776e-06, "loss": 0.876, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 65 }, { "epoch": 0.16624685138539042, "grad_norm": 1.453125, "learning_rate": 8.970069796085946e-06, "loss": 0.9004, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 66 }, { "epoch": 0.16876574307304787, "grad_norm": 1.7421875, "learning_rate": 8.968922992600714e-06, "loss": 0.8809, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 67 }, { "epoch": 0.1712846347607053, "grad_norm": 1.390625, "learning_rate": 8.96777335711346e-06, "loss": 0.8721, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 68 }, { "epoch": 0.17380352644836272, "grad_norm": 1.5078125, "learning_rate": 8.966620879120879e-06, "loss": 0.8545, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 69 }, { "epoch": 0.17632241813602015, "grad_norm": 1.5859375, "learning_rate": 8.965465548067666e-06, "loss": 0.9199, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 70 }, { "epoch": 0.17884130982367757, "grad_norm": 1.234375, "learning_rate": 8.964307353346186e-06, "loss": 0.8467, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 71 }, { "epoch": 0.181360201511335, "grad_norm": 1.234375, "learning_rate": 8.963146284296154e-06, "loss": 0.9258, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 72 }, { "epoch": 0.18387909319899245, "grad_norm": 1.3125, "learning_rate": 8.961982330204308e-06, "loss": 0.8691, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 73 }, { "epoch": 0.18639798488664988, "grad_norm": 1.3046875, "learning_rate": 8.960815480304078e-06, "loss": 0.7891, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 74 }, { "epoch": 0.1889168765743073, "grad_norm": 1.3828125, "learning_rate": 8.959645723775257e-06, "loss": 0.8916, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 75 }, { "epoch": 0.19143576826196473, "grad_norm": 1.3515625, "learning_rate": 8.958473049743662e-06, "loss": 0.9062, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 76 }, { "epoch": 0.19395465994962216, "grad_norm": 1.21875, "learning_rate": 8.9572974472808e-06, "loss": 0.8076, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 77 }, { "epoch": 0.1964735516372796, "grad_norm": 1.2421875, "learning_rate": 8.956118905403529e-06, "loss": 0.874, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 78 }, { "epoch": 0.19899244332493704, "grad_norm": 1.3828125, "learning_rate": 8.954937413073714e-06, "loss": 0.9102, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 79 }, { "epoch": 0.20151133501259447, "grad_norm": 1.25, "learning_rate": 8.953752959197885e-06, "loss": 0.8408, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 80 }, { "epoch": 0.2040302267002519, "grad_norm": 1.3828125, "learning_rate": 8.952565532626881e-06, "loss": 0.958, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 81 }, { "epoch": 0.20654911838790932, "grad_norm": 1.421875, "learning_rate": 8.951375122155523e-06, "loss": 0.9668, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 82 }, { "epoch": 0.20906801007556675, "grad_norm": 1.2890625, "learning_rate": 8.950181716522227e-06, "loss": 0.8779, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 83 }, { "epoch": 0.21158690176322417, "grad_norm": 1.1796875, "learning_rate": 8.948985304408678e-06, "loss": 0.8057, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 84 }, { "epoch": 0.2141057934508816, "grad_norm": 1.203125, "learning_rate": 8.947785874439462e-06, "loss": 0.8672, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 85 }, { "epoch": 0.21662468513853905, "grad_norm": 1.5078125, "learning_rate": 8.946583415181705e-06, "loss": 0.8369, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 86 }, { "epoch": 0.21914357682619648, "grad_norm": 1.2265625, "learning_rate": 8.945377915144704e-06, "loss": 0.8311, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 87 }, { "epoch": 0.2216624685138539, "grad_norm": 1.1484375, "learning_rate": 8.944169362779576e-06, "loss": 0.8066, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 88 }, { "epoch": 0.22418136020151133, "grad_norm": 1.2109375, "learning_rate": 8.942957746478874e-06, "loss": 0.8584, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 89 }, { "epoch": 0.22670025188916876, "grad_norm": 1.203125, "learning_rate": 8.941743054576224e-06, "loss": 0.8223, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 90 }, { "epoch": 0.22921914357682618, "grad_norm": 1.1015625, "learning_rate": 8.940525275345949e-06, "loss": 0.8828, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 91 }, { "epoch": 0.23173803526448364, "grad_norm": 2.3125, "learning_rate": 8.939304397002686e-06, "loss": 0.9102, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 92 }, { "epoch": 0.23425692695214106, "grad_norm": 1.453125, "learning_rate": 8.938080407701019e-06, "loss": 0.8379, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 93 }, { "epoch": 0.2367758186397985, "grad_norm": 1.2421875, "learning_rate": 8.936853295535081e-06, "loss": 0.7842, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 94 }, { "epoch": 0.23929471032745592, "grad_norm": 1.2578125, "learning_rate": 8.935623048538179e-06, "loss": 0.7998, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 95 }, { "epoch": 0.24181360201511334, "grad_norm": 1.1796875, "learning_rate": 8.934389654682394e-06, "loss": 0.8838, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 96 }, { "epoch": 0.24433249370277077, "grad_norm": 1.1796875, "learning_rate": 8.933153101878202e-06, "loss": 0.8379, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 97 }, { "epoch": 0.24685138539042822, "grad_norm": 1.3828125, "learning_rate": 8.93191337797407e-06, "loss": 0.8896, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 98 }, { "epoch": 0.24937027707808565, "grad_norm": 1.1640625, "learning_rate": 8.930670470756064e-06, "loss": 0.8779, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 99 }, { "epoch": 0.2518891687657431, "grad_norm": 2.21875, "learning_rate": 8.929424367947436e-06, "loss": 0.8799, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 100 }, { "epoch": 0.25440806045340053, "grad_norm": 1.3671875, "learning_rate": 8.928175057208238e-06, "loss": 0.7812, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 101 }, { "epoch": 0.25692695214105793, "grad_norm": 1.5546875, "learning_rate": 8.926922526134899e-06, "loss": 0.7754, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 102 }, { "epoch": 0.2594458438287154, "grad_norm": 1.3359375, "learning_rate": 8.925666762259823e-06, "loss": 0.832, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 103 }, { "epoch": 0.2619647355163728, "grad_norm": 1.359375, "learning_rate": 8.924407753050969e-06, "loss": 0.8779, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 104 }, { "epoch": 0.26448362720403024, "grad_norm": 1.515625, "learning_rate": 8.923145485911444e-06, "loss": 0.7803, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 105 }, { "epoch": 0.26700251889168763, "grad_norm": 1.328125, "learning_rate": 8.92187994817907e-06, "loss": 0.9385, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 106 }, { "epoch": 0.2695214105793451, "grad_norm": 1.2421875, "learning_rate": 8.920611127125973e-06, "loss": 0.9365, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 107 }, { "epoch": 0.27204030226700254, "grad_norm": 1.234375, "learning_rate": 8.919339009958147e-06, "loss": 0.8379, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 108 }, { "epoch": 0.27455919395465994, "grad_norm": 1.1796875, "learning_rate": 8.918063583815029e-06, "loss": 0.8555, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 109 }, { "epoch": 0.2770780856423174, "grad_norm": 1.34375, "learning_rate": 8.916784835769064e-06, "loss": 0.9082, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 110 }, { "epoch": 0.2795969773299748, "grad_norm": 1.3046875, "learning_rate": 8.915502752825269e-06, "loss": 0.8721, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 111 }, { "epoch": 0.28211586901763225, "grad_norm": 1.1953125, "learning_rate": 8.914217321920789e-06, "loss": 0.8408, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 112 }, { "epoch": 0.28463476070528965, "grad_norm": 1.2421875, "learning_rate": 8.912928529924463e-06, "loss": 0.8203, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 113 }, { "epoch": 0.2871536523929471, "grad_norm": 1.734375, "learning_rate": 8.911636363636363e-06, "loss": 0.8457, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 114 }, { "epoch": 0.28967254408060455, "grad_norm": 1.3359375, "learning_rate": 8.910340809787358e-06, "loss": 0.8301, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 115 }, { "epoch": 0.29219143576826195, "grad_norm": 1.3203125, "learning_rate": 8.909041855038646e-06, "loss": 0.8018, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 116 }, { "epoch": 0.2947103274559194, "grad_norm": 1.2265625, "learning_rate": 8.907739485981309e-06, "loss": 0.7891, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 117 }, { "epoch": 0.2972292191435768, "grad_norm": 1.1015625, "learning_rate": 8.906433689135838e-06, "loss": 0.8418, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 118 }, { "epoch": 0.29974811083123426, "grad_norm": 1.453125, "learning_rate": 8.905124450951684e-06, "loss": 0.8604, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 119 }, { "epoch": 0.3022670025188917, "grad_norm": 2.21875, "learning_rate": 8.903811757806774e-06, "loss": 0.8984, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 120 }, { "epoch": 0.3047858942065491, "grad_norm": 1.5234375, "learning_rate": 8.902495596007047e-06, "loss": 0.9189, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 121 }, { "epoch": 0.30730478589420657, "grad_norm": 1.3515625, "learning_rate": 8.901175951785976e-06, "loss": 0.8018, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 122 }, { "epoch": 0.30982367758186397, "grad_norm": 1.265625, "learning_rate": 8.899852811304093e-06, "loss": 0.8066, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 123 }, { "epoch": 0.3123425692695214, "grad_norm": 1.171875, "learning_rate": 8.89852616064849e-06, "loss": 0.8506, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 124 }, { "epoch": 0.3148614609571788, "grad_norm": 1.0625, "learning_rate": 8.89719598583235e-06, "loss": 0.7969, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 125 }, { "epoch": 0.31738035264483627, "grad_norm": 1.421875, "learning_rate": 8.895862272794443e-06, "loss": 0.792, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 126 }, { "epoch": 0.3198992443324937, "grad_norm": 1.1171875, "learning_rate": 8.894525007398639e-06, "loss": 0.8096, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 127 }, { "epoch": 0.3224181360201511, "grad_norm": 1.2265625, "learning_rate": 8.893184175433398e-06, "loss": 0.7803, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 128 }, { "epoch": 0.3249370277078086, "grad_norm": 1.4453125, "learning_rate": 8.891839762611276e-06, "loss": 0.8662, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 129 }, { "epoch": 0.327455919395466, "grad_norm": 1.46875, "learning_rate": 8.890491754568414e-06, "loss": 0.7393, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 130 }, { "epoch": 0.32997481108312343, "grad_norm": 1.1875, "learning_rate": 8.889140136864028e-06, "loss": 0.7891, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 131 }, { "epoch": 0.33249370277078083, "grad_norm": 1.15625, "learning_rate": 8.88778489497989e-06, "loss": 0.9453, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 132 }, { "epoch": 0.3350125944584383, "grad_norm": 1.21875, "learning_rate": 8.88642601431981e-06, "loss": 0.7617, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 133 }, { "epoch": 0.33753148614609574, "grad_norm": 1.15625, "learning_rate": 8.88506348020911e-06, "loss": 0.8564, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 134 }, { "epoch": 0.34005037783375314, "grad_norm": 1.296875, "learning_rate": 8.883697277894106e-06, "loss": 0.8047, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 135 }, { "epoch": 0.3425692695214106, "grad_norm": 1.3046875, "learning_rate": 8.882327392541561e-06, "loss": 0.8721, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 136 }, { "epoch": 0.345088161209068, "grad_norm": 1.3046875, "learning_rate": 8.880953809238152e-06, "loss": 0.8652, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 137 }, { "epoch": 0.34760705289672544, "grad_norm": 1.3515625, "learning_rate": 8.879576512989938e-06, "loss": 0.8438, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 138 }, { "epoch": 0.3501259445843829, "grad_norm": 1.1015625, "learning_rate": 8.878195488721804e-06, "loss": 0.8008, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 139 }, { "epoch": 0.3526448362720403, "grad_norm": 1.53125, "learning_rate": 8.876810721276917e-06, "loss": 0.9053, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 140 }, { "epoch": 0.35516372795969775, "grad_norm": 1.3359375, "learning_rate": 8.875422195416164e-06, "loss": 0.8672, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 141 }, { "epoch": 0.35768261964735515, "grad_norm": 1.2265625, "learning_rate": 8.874029895817606e-06, "loss": 0.8008, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 142 }, { "epoch": 0.3602015113350126, "grad_norm": 1.1953125, "learning_rate": 8.8726338070759e-06, "loss": 0.7822, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 143 }, { "epoch": 0.36272040302267, "grad_norm": 1.140625, "learning_rate": 8.871233913701741e-06, "loss": 0.7842, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 144 }, { "epoch": 0.36523929471032746, "grad_norm": 1.1953125, "learning_rate": 8.869830200121285e-06, "loss": 0.7891, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 145 }, { "epoch": 0.3677581863979849, "grad_norm": 1.125, "learning_rate": 8.868422650675573e-06, "loss": 0.8408, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 146 }, { "epoch": 0.3702770780856423, "grad_norm": 1.2578125, "learning_rate": 8.867011249619946e-06, "loss": 0.8447, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 147 }, { "epoch": 0.37279596977329976, "grad_norm": 1.2421875, "learning_rate": 8.865595981123458e-06, "loss": 0.8613, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 148 }, { "epoch": 0.37531486146095716, "grad_norm": 1.4375, "learning_rate": 8.864176829268293e-06, "loss": 0.79, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 149 }, { "epoch": 0.3778337531486146, "grad_norm": 1.2578125, "learning_rate": 8.862753778049153e-06, "loss": 0.7529, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 150 }, { "epoch": 0.380352644836272, "grad_norm": 1.328125, "learning_rate": 8.86132681137267e-06, "loss": 0.8779, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 151 }, { "epoch": 0.38287153652392947, "grad_norm": 1.2890625, "learning_rate": 8.859895913056788e-06, "loss": 0.8652, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 152 }, { "epoch": 0.3853904282115869, "grad_norm": 1.265625, "learning_rate": 8.858461066830166e-06, "loss": 0.7529, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 153 }, { "epoch": 0.3879093198992443, "grad_norm": 1.359375, "learning_rate": 8.857022256331542e-06, "loss": 0.8027, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 154 }, { "epoch": 0.3904282115869018, "grad_norm": 1.40625, "learning_rate": 8.85557946510913e-06, "loss": 0.833, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 155 }, { "epoch": 0.3929471032745592, "grad_norm": 1.1328125, "learning_rate": 8.854132676619979e-06, "loss": 0.8008, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 156 }, { "epoch": 0.3954659949622166, "grad_norm": 1.1796875, "learning_rate": 8.852681874229348e-06, "loss": 0.8594, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 157 }, { "epoch": 0.3979848866498741, "grad_norm": 1.140625, "learning_rate": 8.851227041210064e-06, "loss": 0.7725, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 158 }, { "epoch": 0.4005037783375315, "grad_norm": 1.3828125, "learning_rate": 8.849768160741887e-06, "loss": 0.9307, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 159 }, { "epoch": 0.40302267002518893, "grad_norm": 1.296875, "learning_rate": 8.84830521591085e-06, "loss": 0.8184, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 160 }, { "epoch": 0.40554156171284633, "grad_norm": 1.0625, "learning_rate": 8.846838189708618e-06, "loss": 0.8447, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 161 }, { "epoch": 0.4080604534005038, "grad_norm": 1.1484375, "learning_rate": 8.845367065031817e-06, "loss": 0.8145, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 162 }, { "epoch": 0.4105793450881612, "grad_norm": 1.3671875, "learning_rate": 8.843891824681381e-06, "loss": 0.8037, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 163 }, { "epoch": 0.41309823677581864, "grad_norm": 1.125, "learning_rate": 8.842412451361868e-06, "loss": 0.832, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 164 }, { "epoch": 0.4156171284634761, "grad_norm": 1.1484375, "learning_rate": 8.840928927680798e-06, "loss": 0.8184, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 165 }, { "epoch": 0.4181360201511335, "grad_norm": 1.171875, "learning_rate": 8.839441236147964e-06, "loss": 0.8506, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 166 }, { "epoch": 0.42065491183879095, "grad_norm": 1.140625, "learning_rate": 8.837949359174743e-06, "loss": 0.8154, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 167 }, { "epoch": 0.42317380352644834, "grad_norm": 1.1015625, "learning_rate": 8.836453279073408e-06, "loss": 0.7705, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 168 }, { "epoch": 0.4256926952141058, "grad_norm": 1.34375, "learning_rate": 8.834952978056426e-06, "loss": 0.8115, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 169 }, { "epoch": 0.4282115869017632, "grad_norm": 1.421875, "learning_rate": 8.833448438235755e-06, "loss": 0.8203, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 170 }, { "epoch": 0.43073047858942065, "grad_norm": 1.25, "learning_rate": 8.831939641622132e-06, "loss": 0.7988, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 171 }, { "epoch": 0.4332493702770781, "grad_norm": 1.203125, "learning_rate": 8.83042657012435e-06, "loss": 0.7988, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 172 }, { "epoch": 0.4357682619647355, "grad_norm": 1.15625, "learning_rate": 8.82890920554855e-06, "loss": 0.8359, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 173 }, { "epoch": 0.43828715365239296, "grad_norm": 1.3515625, "learning_rate": 8.827387529597475e-06, "loss": 0.8623, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 174 }, { "epoch": 0.44080604534005036, "grad_norm": 1.4453125, "learning_rate": 8.825861523869745e-06, "loss": 0.8232, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 175 }, { "epoch": 0.4433249370277078, "grad_norm": 1.2734375, "learning_rate": 8.824331169859111e-06, "loss": 0.7861, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 176 }, { "epoch": 0.44584382871536526, "grad_norm": 1.21875, "learning_rate": 8.82279644895371e-06, "loss": 0.7861, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 177 }, { "epoch": 0.44836272040302266, "grad_norm": 1.3671875, "learning_rate": 8.821257342435307e-06, "loss": 0.7646, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 178 }, { "epoch": 0.4508816120906801, "grad_norm": 1.2734375, "learning_rate": 8.819713831478538e-06, "loss": 0.7959, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 179 }, { "epoch": 0.4534005037783375, "grad_norm": 1.4140625, "learning_rate": 8.818165897150136e-06, "loss": 0.8311, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 180 }, { "epoch": 0.45591939546599497, "grad_norm": 1.0859375, "learning_rate": 8.816613520408164e-06, "loss": 0.748, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 181 }, { "epoch": 0.45843828715365237, "grad_norm": 1.5625, "learning_rate": 8.815056682101229e-06, "loss": 0.8076, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 182 }, { "epoch": 0.4609571788413098, "grad_norm": 1.28125, "learning_rate": 8.8134953629677e-06, "loss": 0.7988, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 183 }, { "epoch": 0.4634760705289673, "grad_norm": 1.3671875, "learning_rate": 8.81192954363491e-06, "loss": 0.8828, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 184 }, { "epoch": 0.4659949622166247, "grad_norm": 1.234375, "learning_rate": 8.810359204618346e-06, "loss": 0.8242, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 185 }, { "epoch": 0.46851385390428213, "grad_norm": 1.640625, "learning_rate": 8.808784326320862e-06, "loss": 0.7812, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 186 }, { "epoch": 0.47103274559193953, "grad_norm": 1.0859375, "learning_rate": 8.807204889031843e-06, "loss": 0.7666, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 187 }, { "epoch": 0.473551637279597, "grad_norm": 1.296875, "learning_rate": 8.805620872926398e-06, "loss": 0.7598, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 188 }, { "epoch": 0.4760705289672544, "grad_norm": 1.046875, "learning_rate": 8.804032258064517e-06, "loss": 0.7295, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 189 }, { "epoch": 0.47858942065491183, "grad_norm": 1.3125, "learning_rate": 8.802439024390243e-06, "loss": 0.8799, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 190 }, { "epoch": 0.4811083123425693, "grad_norm": 1.140625, "learning_rate": 8.80084115173083e-06, "loss": 0.7549, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 191 }, { "epoch": 0.4836272040302267, "grad_norm": 1.265625, "learning_rate": 8.799238619795886e-06, "loss": 0.8633, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 192 }, { "epoch": 0.48614609571788414, "grad_norm": 1.109375, "learning_rate": 8.79763140817651e-06, "loss": 0.8408, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 193 }, { "epoch": 0.48866498740554154, "grad_norm": 1.2109375, "learning_rate": 8.796019496344436e-06, "loss": 0.8496, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 194 }, { "epoch": 0.491183879093199, "grad_norm": 1.671875, "learning_rate": 8.794402863651155e-06, "loss": 0.833, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 195 }, { "epoch": 0.49370277078085645, "grad_norm": 1.421875, "learning_rate": 8.792781489327033e-06, "loss": 0.8457, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 196 }, { "epoch": 0.49622166246851385, "grad_norm": 1.109375, "learning_rate": 8.791155352480418e-06, "loss": 0.7988, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 197 }, { "epoch": 0.4987405541561713, "grad_norm": 1.2265625, "learning_rate": 8.789524432096748e-06, "loss": 0.8125, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 198 }, { "epoch": 0.5012594458438288, "grad_norm": 1.1953125, "learning_rate": 8.787888707037644e-06, "loss": 0.7598, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 199 }, { "epoch": 0.5037783375314862, "grad_norm": 1.2109375, "learning_rate": 8.786248156039995e-06, "loss": 0.8047, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 200 }, { "epoch": 0.5062972292191436, "grad_norm": 1.140625, "learning_rate": 8.784602757715037e-06, "loss": 0.8418, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 201 }, { "epoch": 0.5088161209068011, "grad_norm": 1.3125, "learning_rate": 8.782952490547428e-06, "loss": 0.8369, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 202 }, { "epoch": 0.5113350125944585, "grad_norm": 2.515625, "learning_rate": 8.781297332894304e-06, "loss": 0.79, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 203 }, { "epoch": 0.5138539042821159, "grad_norm": 1.4453125, "learning_rate": 8.779637262984337e-06, "loss": 0.8691, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 204 }, { "epoch": 0.5163727959697733, "grad_norm": 1.4765625, "learning_rate": 8.777972258916777e-06, "loss": 0.8809, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 205 }, { "epoch": 0.5188916876574308, "grad_norm": 1.2421875, "learning_rate": 8.776302298660493e-06, "loss": 0.8086, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 206 }, { "epoch": 0.5214105793450882, "grad_norm": 1.1484375, "learning_rate": 8.774627360052997e-06, "loss": 0.7715, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 207 }, { "epoch": 0.5239294710327456, "grad_norm": 1.2734375, "learning_rate": 8.77294742079947e-06, "loss": 0.8145, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 208 }, { "epoch": 0.5264483627204031, "grad_norm": 1.34375, "learning_rate": 8.771262458471761e-06, "loss": 0.7949, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 209 }, { "epoch": 0.5289672544080605, "grad_norm": 1.3671875, "learning_rate": 8.769572450507402e-06, "loss": 0.75, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 210 }, { "epoch": 0.5314861460957179, "grad_norm": 1.203125, "learning_rate": 8.767877374208597e-06, "loss": 0.8477, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 211 }, { "epoch": 0.5340050377833753, "grad_norm": 1.171875, "learning_rate": 8.7661772067412e-06, "loss": 0.8154, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 212 }, { "epoch": 0.5365239294710328, "grad_norm": 1.125, "learning_rate": 8.76447192513369e-06, "loss": 0.7559, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 213 }, { "epoch": 0.5390428211586902, "grad_norm": 1.2265625, "learning_rate": 8.76276150627615e-06, "loss": 0.8047, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 214 }, { "epoch": 0.5415617128463476, "grad_norm": 1.1015625, "learning_rate": 8.761045926919207e-06, "loss": 0.792, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 215 }, { "epoch": 0.5440806045340051, "grad_norm": 1.1640625, "learning_rate": 8.75932516367299e-06, "loss": 0.7939, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 216 }, { "epoch": 0.5465994962216625, "grad_norm": 1.21875, "learning_rate": 8.757599193006053e-06, "loss": 0.7637, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 217 }, { "epoch": 0.5491183879093199, "grad_norm": 1.2421875, "learning_rate": 8.755867991244318e-06, "loss": 0.7939, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 218 }, { "epoch": 0.5516372795969773, "grad_norm": 1.0546875, "learning_rate": 8.754131534569984e-06, "loss": 0.7812, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 219 }, { "epoch": 0.5541561712846348, "grad_norm": 1.203125, "learning_rate": 8.752389799020436e-06, "loss": 0.7695, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 220 }, { "epoch": 0.5566750629722922, "grad_norm": 1.2265625, "learning_rate": 8.750642760487145e-06, "loss": 0.8857, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 221 }, { "epoch": 0.5591939546599496, "grad_norm": 1.1953125, "learning_rate": 8.748890394714553e-06, "loss": 0.8574, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 222 }, { "epoch": 0.5617128463476071, "grad_norm": 1.125, "learning_rate": 8.747132677298948e-06, "loss": 0.8193, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 223 }, { "epoch": 0.5642317380352645, "grad_norm": 1.0234375, "learning_rate": 8.74536958368734e-06, "loss": 0.7441, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 224 }, { "epoch": 0.5667506297229219, "grad_norm": 1.203125, "learning_rate": 8.74360108917631e-06, "loss": 0.792, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 225 }, { "epoch": 0.5692695214105793, "grad_norm": 1.28125, "learning_rate": 8.741827168910857e-06, "loss": 0.8105, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 226 }, { "epoch": 0.5717884130982368, "grad_norm": 1.0625, "learning_rate": 8.740047797883237e-06, "loss": 0.8115, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 227 }, { "epoch": 0.5743073047858942, "grad_norm": 1.140625, "learning_rate": 8.738262950931784e-06, "loss": 0.7998, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 228 }, { "epoch": 0.5768261964735516, "grad_norm": 1.234375, "learning_rate": 8.736472602739726e-06, "loss": 0.7686, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 229 }, { "epoch": 0.5793450881612091, "grad_norm": 1.203125, "learning_rate": 8.73467672783399e-06, "loss": 0.7959, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 230 }, { "epoch": 0.5818639798488665, "grad_norm": 1.1796875, "learning_rate": 8.73287530058399e-06, "loss": 0.6982, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 231 }, { "epoch": 0.5843828715365239, "grad_norm": 1.21875, "learning_rate": 8.731068295200414e-06, "loss": 0.7695, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 232 }, { "epoch": 0.5869017632241813, "grad_norm": 1.1953125, "learning_rate": 8.729255685733977e-06, "loss": 0.7637, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 233 }, { "epoch": 0.5894206549118388, "grad_norm": 1.15625, "learning_rate": 8.727437446074203e-06, "loss": 0.7646, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 234 }, { "epoch": 0.5919395465994962, "grad_norm": 1.125, "learning_rate": 8.72561354994815e-06, "loss": 0.7607, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 235 }, { "epoch": 0.5944584382871536, "grad_norm": 10.6875, "learning_rate": 8.723783970919162e-06, "loss": 0.8115, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 236 }, { "epoch": 0.5969773299748111, "grad_norm": 1.171875, "learning_rate": 8.721948682385575e-06, "loss": 0.7295, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 237 }, { "epoch": 0.5994962216624685, "grad_norm": 1.1953125, "learning_rate": 8.720107657579442e-06, "loss": 0.7764, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 238 }, { "epoch": 0.6020151133501259, "grad_norm": 1.09375, "learning_rate": 8.718260869565218e-06, "loss": 0.8428, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 239 }, { "epoch": 0.6045340050377834, "grad_norm": 1.1796875, "learning_rate": 8.716408291238462e-06, "loss": 0.7832, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 240 }, { "epoch": 0.6070528967254408, "grad_norm": 1.171875, "learning_rate": 8.714549895324495e-06, "loss": 0.7861, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 241 }, { "epoch": 0.6095717884130982, "grad_norm": 1.2109375, "learning_rate": 8.712685654377075e-06, "loss": 0.7676, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 242 }, { "epoch": 0.6120906801007556, "grad_norm": 1.15625, "learning_rate": 8.710815540777039e-06, "loss": 0.7598, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 243 }, { "epoch": 0.6146095717884131, "grad_norm": 1.140625, "learning_rate": 8.708939526730937e-06, "loss": 0.7979, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 244 }, { "epoch": 0.6171284634760705, "grad_norm": 1.2421875, "learning_rate": 8.707057584269663e-06, "loss": 0.7959, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 245 }, { "epoch": 0.6196473551637279, "grad_norm": 1.125, "learning_rate": 8.705169685247055e-06, "loss": 0.8115, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 246 }, { "epoch": 0.6221662468513854, "grad_norm": 1.0546875, "learning_rate": 8.7032758013385e-06, "loss": 0.7598, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 247 }, { "epoch": 0.6246851385390428, "grad_norm": 1.1953125, "learning_rate": 8.701375904039512e-06, "loss": 0.8301, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 248 }, { "epoch": 0.6272040302267002, "grad_norm": 1.609375, "learning_rate": 8.699469964664311e-06, "loss": 0.7529, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 249 }, { "epoch": 0.6297229219143576, "grad_norm": 1.203125, "learning_rate": 8.697557954344365e-06, "loss": 0.7852, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 250 }, { "epoch": 0.6322418136020151, "grad_norm": 1.1953125, "learning_rate": 8.695639844026941e-06, "loss": 0.8721, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 251 }, { "epoch": 0.6347607052896725, "grad_norm": 1.2421875, "learning_rate": 8.693715604473637e-06, "loss": 0.915, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 252 }, { "epoch": 0.6372795969773299, "grad_norm": 1.1640625, "learning_rate": 8.691785206258892e-06, "loss": 0.752, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 253 }, { "epoch": 0.6397984886649875, "grad_norm": 1.546875, "learning_rate": 8.689848619768477e-06, "loss": 0.8145, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 254 }, { "epoch": 0.6423173803526449, "grad_norm": 1.296875, "learning_rate": 8.687905815198002e-06, "loss": 0.8467, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 255 }, { "epoch": 0.6448362720403022, "grad_norm": 1.328125, "learning_rate": 8.685956762551366e-06, "loss": 0.8008, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 256 }, { "epoch": 0.6473551637279596, "grad_norm": 1.140625, "learning_rate": 8.684001431639228e-06, "loss": 0.7744, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 257 }, { "epoch": 0.6498740554156172, "grad_norm": 1.03125, "learning_rate": 8.682039792077434e-06, "loss": 0.7354, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 258 }, { "epoch": 0.6523929471032746, "grad_norm": 1.1171875, "learning_rate": 8.680071813285458e-06, "loss": 0.833, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 259 }, { "epoch": 0.654911838790932, "grad_norm": 1.1640625, "learning_rate": 8.678097464484805e-06, "loss": 0.7764, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 260 }, { "epoch": 0.6574307304785895, "grad_norm": 1.46875, "learning_rate": 8.676116714697407e-06, "loss": 0.8633, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 261 }, { "epoch": 0.6599496221662469, "grad_norm": 1.09375, "learning_rate": 8.674129532744001e-06, "loss": 0.8398, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 262 }, { "epoch": 0.6624685138539043, "grad_norm": 1.3671875, "learning_rate": 8.672135887242501e-06, "loss": 0.8594, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 263 }, { "epoch": 0.6649874055415617, "grad_norm": 1.234375, "learning_rate": 8.670135746606335e-06, "loss": 0.7314, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 264 }, { "epoch": 0.6675062972292192, "grad_norm": 1.0546875, "learning_rate": 8.668129079042786e-06, "loss": 0.7266, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 265 }, { "epoch": 0.6700251889168766, "grad_norm": 1.2734375, "learning_rate": 8.666115852551298e-06, "loss": 0.9062, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 266 }, { "epoch": 0.672544080604534, "grad_norm": 1.1796875, "learning_rate": 8.66409603492179e-06, "loss": 0.707, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 267 }, { "epoch": 0.6750629722921915, "grad_norm": 1.046875, "learning_rate": 8.66206959373292e-06, "loss": 0.7881, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 268 }, { "epoch": 0.6775818639798489, "grad_norm": 1.1328125, "learning_rate": 8.660036496350365e-06, "loss": 0.7598, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 269 }, { "epoch": 0.6801007556675063, "grad_norm": 1.3125, "learning_rate": 8.657996709925059e-06, "loss": 0.8418, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 270 }, { "epoch": 0.6826196473551638, "grad_norm": 1.2421875, "learning_rate": 8.655950201391432e-06, "loss": 0.7588, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 271 }, { "epoch": 0.6851385390428212, "grad_norm": 1.0625, "learning_rate": 8.653896937465615e-06, "loss": 0.7383, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 272 }, { "epoch": 0.6876574307304786, "grad_norm": 1.28125, "learning_rate": 8.651836884643645e-06, "loss": 0.7686, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 273 }, { "epoch": 0.690176322418136, "grad_norm": 1.328125, "learning_rate": 8.649770009199633e-06, "loss": 0.8057, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 274 }, { "epoch": 0.6926952141057935, "grad_norm": 1.28125, "learning_rate": 8.647696277183928e-06, "loss": 0.79, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 275 }, { "epoch": 0.6952141057934509, "grad_norm": 1.1171875, "learning_rate": 8.645615654421265e-06, "loss": 0.8838, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 276 }, { "epoch": 0.6977329974811083, "grad_norm": 1.5703125, "learning_rate": 8.643528106508877e-06, "loss": 0.7314, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 277 }, { "epoch": 0.7002518891687658, "grad_norm": 1.3359375, "learning_rate": 8.641433598814596e-06, "loss": 0.7676, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 278 }, { "epoch": 0.7027707808564232, "grad_norm": 1.2890625, "learning_rate": 8.639332096474953e-06, "loss": 0.8281, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 279 }, { "epoch": 0.7052896725440806, "grad_norm": 1.15625, "learning_rate": 8.637223564393235e-06, "loss": 0.7646, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 280 }, { "epoch": 0.707808564231738, "grad_norm": 1.2265625, "learning_rate": 8.635107967237528e-06, "loss": 0.8105, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 281 }, { "epoch": 0.7103274559193955, "grad_norm": 1.2265625, "learning_rate": 8.632985269438747e-06, "loss": 0.8438, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 282 }, { "epoch": 0.7128463476070529, "grad_norm": 1.109375, "learning_rate": 8.630855435188644e-06, "loss": 0.7891, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 283 }, { "epoch": 0.7153652392947103, "grad_norm": 1.0859375, "learning_rate": 8.628718428437793e-06, "loss": 0.9287, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 284 }, { "epoch": 0.7178841309823678, "grad_norm": 1.125, "learning_rate": 8.626574212893554e-06, "loss": 0.7598, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 285 }, { "epoch": 0.7204030226700252, "grad_norm": 1.140625, "learning_rate": 8.624422752018023e-06, "loss": 0.8291, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 286 }, { "epoch": 0.7229219143576826, "grad_norm": 1.296875, "learning_rate": 8.62226400902595e-06, "loss": 0.7832, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 287 }, { "epoch": 0.72544080604534, "grad_norm": 1.6328125, "learning_rate": 8.62009794688265e-06, "loss": 0.8164, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 288 }, { "epoch": 0.7279596977329975, "grad_norm": 1.234375, "learning_rate": 8.617924528301888e-06, "loss": 0.7773, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 289 }, { "epoch": 0.7304785894206549, "grad_norm": 1.171875, "learning_rate": 8.615743715743717e-06, "loss": 0.8291, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 290 }, { "epoch": 0.7329974811083123, "grad_norm": 1.078125, "learning_rate": 8.613555471412343e-06, "loss": 0.7959, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 291 }, { "epoch": 0.7355163727959698, "grad_norm": 1.40625, "learning_rate": 8.611359757253934e-06, "loss": 0.7549, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 292 }, { "epoch": 0.7380352644836272, "grad_norm": 1.0703125, "learning_rate": 8.609156534954409e-06, "loss": 0.7715, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 293 }, { "epoch": 0.7405541561712846, "grad_norm": 1.1015625, "learning_rate": 8.606945765937203e-06, "loss": 0.7861, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 294 }, { "epoch": 0.743073047858942, "grad_norm": 1.3046875, "learning_rate": 8.604727411361037e-06, "loss": 0.8574, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 295 }, { "epoch": 0.7455919395465995, "grad_norm": 1.125, "learning_rate": 8.602501432117624e-06, "loss": 0.75, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 296 }, { "epoch": 0.7481108312342569, "grad_norm": 1.0546875, "learning_rate": 8.600267788829381e-06, "loss": 0.8682, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 297 }, { "epoch": 0.7506297229219143, "grad_norm": 1.078125, "learning_rate": 8.598026441847098e-06, "loss": 0.8125, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 298 }, { "epoch": 0.7531486146095718, "grad_norm": 1.09375, "learning_rate": 8.595777351247601e-06, "loss": 0.8555, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 299 }, { "epoch": 0.7556675062972292, "grad_norm": 1.15625, "learning_rate": 8.593520476831378e-06, "loss": 0.7256, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 300 }, { "epoch": 0.7581863979848866, "grad_norm": 1.0546875, "learning_rate": 8.591255778120185e-06, "loss": 0.8066, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 301 }, { "epoch": 0.760705289672544, "grad_norm": 1.1015625, "learning_rate": 8.58898321435462e-06, "loss": 0.8301, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 302 }, { "epoch": 0.7632241813602015, "grad_norm": 1.1171875, "learning_rate": 8.58670274449169e-06, "loss": 0.7686, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 303 }, { "epoch": 0.7657430730478589, "grad_norm": 1.234375, "learning_rate": 8.584414327202324e-06, "loss": 0.7969, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 304 }, { "epoch": 0.7682619647355163, "grad_norm": 1.171875, "learning_rate": 8.582117920868892e-06, "loss": 0.8574, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 305 }, { "epoch": 0.7707808564231738, "grad_norm": 1.0703125, "learning_rate": 8.57981348358267e-06, "loss": 0.7412, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 306 }, { "epoch": 0.7732997481108312, "grad_norm": 1.265625, "learning_rate": 8.5775009731413e-06, "loss": 0.9023, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 307 }, { "epoch": 0.7758186397984886, "grad_norm": 1.21875, "learning_rate": 8.575180347046208e-06, "loss": 0.8477, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 308 }, { "epoch": 0.7783375314861462, "grad_norm": 1.1015625, "learning_rate": 8.5728515625e-06, "loss": 0.8184, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 309 }, { "epoch": 0.7808564231738035, "grad_norm": 1.0078125, "learning_rate": 8.570514576403836e-06, "loss": 0.7373, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 310 }, { "epoch": 0.783375314861461, "grad_norm": 1.1875, "learning_rate": 8.568169345354762e-06, "loss": 0.7637, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 311 }, { "epoch": 0.7858942065491183, "grad_norm": 1.1328125, "learning_rate": 8.565815825643039e-06, "loss": 0.7979, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 312 }, { "epoch": 0.7884130982367759, "grad_norm": 1.3203125, "learning_rate": 8.56345397324941e-06, "loss": 0.8418, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 313 }, { "epoch": 0.7909319899244333, "grad_norm": 1.1796875, "learning_rate": 8.561083743842366e-06, "loss": 0.8135, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 314 }, { "epoch": 0.7934508816120907, "grad_norm": 1.1953125, "learning_rate": 8.558705092775365e-06, "loss": 0.8242, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 315 }, { "epoch": 0.7959697732997482, "grad_norm": 1.34375, "learning_rate": 8.556317975084042e-06, "loss": 0.6963, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 316 }, { "epoch": 0.7984886649874056, "grad_norm": 1.453125, "learning_rate": 8.55392234548336e-06, "loss": 0.8691, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 317 }, { "epoch": 0.801007556675063, "grad_norm": 1.078125, "learning_rate": 8.551518158364756e-06, "loss": 0.7109, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 318 }, { "epoch": 0.8035264483627204, "grad_norm": 1.15625, "learning_rate": 8.54910536779324e-06, "loss": 0.7812, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 319 }, { "epoch": 0.8060453400503779, "grad_norm": 1.1171875, "learning_rate": 8.546683927504481e-06, "loss": 0.7041, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 320 }, { "epoch": 0.8085642317380353, "grad_norm": 1.2109375, "learning_rate": 8.544253790901836e-06, "loss": 0.8018, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 321 }, { "epoch": 0.8110831234256927, "grad_norm": 1.0703125, "learning_rate": 8.54181491105337e-06, "loss": 0.7627, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 322 }, { "epoch": 0.8136020151133502, "grad_norm": 1.171875, "learning_rate": 8.539367240688826e-06, "loss": 0.75, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 323 }, { "epoch": 0.8161209068010076, "grad_norm": 1.15625, "learning_rate": 8.536910732196591e-06, "loss": 0.7305, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 324 }, { "epoch": 0.818639798488665, "grad_norm": 1.0703125, "learning_rate": 8.534445337620579e-06, "loss": 0.7949, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 325 }, { "epoch": 0.8211586901763224, "grad_norm": 1.1171875, "learning_rate": 8.531971008657139e-06, "loss": 0.79, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 326 }, { "epoch": 0.8236775818639799, "grad_norm": 1.0703125, "learning_rate": 8.529487696651876e-06, "loss": 0.7666, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 327 }, { "epoch": 0.8261964735516373, "grad_norm": 1.0078125, "learning_rate": 8.526995352596485e-06, "loss": 0.6963, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 328 }, { "epoch": 0.8287153652392947, "grad_norm": 1.0390625, "learning_rate": 8.524493927125505e-06, "loss": 0.7451, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 329 }, { "epoch": 0.8312342569269522, "grad_norm": 1.078125, "learning_rate": 8.521983370513081e-06, "loss": 0.7168, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 330 }, { "epoch": 0.8337531486146096, "grad_norm": 1.5390625, "learning_rate": 8.519463632669648e-06, "loss": 0.7754, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 331 }, { "epoch": 0.836272040302267, "grad_norm": 1.265625, "learning_rate": 8.51693466313861e-06, "loss": 0.8301, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 332 }, { "epoch": 0.8387909319899244, "grad_norm": 1.3046875, "learning_rate": 8.514396411092986e-06, "loss": 0.7988, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 333 }, { "epoch": 0.8413098236775819, "grad_norm": 3.15625, "learning_rate": 8.511848825331972e-06, "loss": 0.7119, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 334 }, { "epoch": 0.8438287153652393, "grad_norm": 1.1171875, "learning_rate": 8.509291854277527e-06, "loss": 0.7715, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 335 }, { "epoch": 0.8463476070528967, "grad_norm": 2.578125, "learning_rate": 8.506725445970883e-06, "loss": 0.7773, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 336 }, { "epoch": 0.8488664987405542, "grad_norm": 1.21875, "learning_rate": 8.504149548069022e-06, "loss": 0.7607, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 337 }, { "epoch": 0.8513853904282116, "grad_norm": 1.2109375, "learning_rate": 8.501564107841121e-06, "loss": 0.752, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 338 }, { "epoch": 0.853904282115869, "grad_norm": 1.1328125, "learning_rate": 8.498969072164949e-06, "loss": 0.7812, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 339 }, { "epoch": 0.8564231738035264, "grad_norm": 1.1171875, "learning_rate": 8.496364387523239e-06, "loss": 0.8232, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 340 }, { "epoch": 0.8589420654911839, "grad_norm": 1.234375, "learning_rate": 8.49375e-06, "loss": 0.8281, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 341 }, { "epoch": 0.8614609571788413, "grad_norm": 1.2734375, "learning_rate": 8.4911258552768e-06, "loss": 0.9336, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 342 }, { "epoch": 0.8639798488664987, "grad_norm": 1.4921875, "learning_rate": 8.488491898629e-06, "loss": 0.7705, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 343 }, { "epoch": 0.8664987405541562, "grad_norm": 1.359375, "learning_rate": 8.485848074921957e-06, "loss": 0.8467, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 344 }, { "epoch": 0.8690176322418136, "grad_norm": 1.1171875, "learning_rate": 8.483194328607173e-06, "loss": 0.7803, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 345 }, { "epoch": 0.871536523929471, "grad_norm": 1.1953125, "learning_rate": 8.480530603718405e-06, "loss": 0.792, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 346 }, { "epoch": 0.8740554156171285, "grad_norm": 1.1328125, "learning_rate": 8.477856843867728e-06, "loss": 0.832, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 347 }, { "epoch": 0.8765743073047859, "grad_norm": 1.0078125, "learning_rate": 8.47517299224156e-06, "loss": 0.7949, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 348 }, { "epoch": 0.8790931989924433, "grad_norm": 1.078125, "learning_rate": 8.472478991596638e-06, "loss": 0.7598, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 349 }, { "epoch": 0.8816120906801007, "grad_norm": 1.09375, "learning_rate": 8.469774784255946e-06, "loss": 0.7158, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 350 }, { "epoch": 0.8841309823677582, "grad_norm": 1.3125, "learning_rate": 8.467060312104597e-06, "loss": 0.7334, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 351 }, { "epoch": 0.8866498740554156, "grad_norm": 1.1484375, "learning_rate": 8.464335516585674e-06, "loss": 0.8545, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 352 }, { "epoch": 0.889168765743073, "grad_norm": 1.21875, "learning_rate": 8.46160033869602e-06, "loss": 0.748, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 353 }, { "epoch": 0.8916876574307305, "grad_norm": 1.265625, "learning_rate": 8.458854718981973e-06, "loss": 0.7686, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 354 }, { "epoch": 0.8942065491183879, "grad_norm": 1.25, "learning_rate": 8.456098597535062e-06, "loss": 0.8613, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 355 }, { "epoch": 0.8967254408060453, "grad_norm": 1.1484375, "learning_rate": 8.453331913987651e-06, "loss": 0.7715, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 356 }, { "epoch": 0.8992443324937027, "grad_norm": 1.1328125, "learning_rate": 8.450554607508531e-06, "loss": 0.7793, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 357 }, { "epoch": 0.9017632241813602, "grad_norm": 1.15625, "learning_rate": 8.447766616798463e-06, "loss": 0.8506, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 358 }, { "epoch": 0.9042821158690176, "grad_norm": 0.984375, "learning_rate": 8.444967880085653e-06, "loss": 0.7002, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 359 }, { "epoch": 0.906801007556675, "grad_norm": 1.0625, "learning_rate": 8.442158335121218e-06, "loss": 0.8105, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 360 }, { "epoch": 0.9093198992443325, "grad_norm": 1.1953125, "learning_rate": 8.439337919174549e-06, "loss": 0.751, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 361 }, { "epoch": 0.9118387909319899, "grad_norm": 1.171875, "learning_rate": 8.436506569028646e-06, "loss": 0.8604, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 362 }, { "epoch": 0.9143576826196473, "grad_norm": 1.3828125, "learning_rate": 8.4336642209754e-06, "loss": 0.8877, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 363 }, { "epoch": 0.9168765743073047, "grad_norm": 1.046875, "learning_rate": 8.430810810810811e-06, "loss": 0.79, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 364 }, { "epoch": 0.9193954659949622, "grad_norm": 1.0703125, "learning_rate": 8.427946273830157e-06, "loss": 0.7539, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 365 }, { "epoch": 0.9219143576826196, "grad_norm": 1.21875, "learning_rate": 8.425070544823096e-06, "loss": 0.7451, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 366 }, { "epoch": 0.924433249370277, "grad_norm": 1.0625, "learning_rate": 8.422183558068725e-06, "loss": 0.8232, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 367 }, { "epoch": 0.9269521410579346, "grad_norm": 1.234375, "learning_rate": 8.419285247330573e-06, "loss": 0.7607, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 368 }, { "epoch": 0.929471032745592, "grad_norm": 1.1484375, "learning_rate": 8.416375545851529e-06, "loss": 0.8428, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 369 }, { "epoch": 0.9319899244332494, "grad_norm": 1.0546875, "learning_rate": 8.413454386348721e-06, "loss": 0.7803, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 370 }, { "epoch": 0.9345088161209067, "grad_norm": 1.0234375, "learning_rate": 8.41052170100833e-06, "loss": 0.7412, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 371 }, { "epoch": 0.9370277078085643, "grad_norm": 1.046875, "learning_rate": 8.407577421480343e-06, "loss": 0.9385, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 372 }, { "epoch": 0.9395465994962217, "grad_norm": 1.1328125, "learning_rate": 8.40462147887324e-06, "loss": 0.7285, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 373 }, { "epoch": 0.9420654911838791, "grad_norm": 1.1015625, "learning_rate": 8.401653803748621e-06, "loss": 0.7832, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 374 }, { "epoch": 0.9445843828715366, "grad_norm": 1.0703125, "learning_rate": 8.398674326115776e-06, "loss": 0.7686, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 375 }, { "epoch": 0.947103274559194, "grad_norm": 1.21875, "learning_rate": 8.395682975426168e-06, "loss": 0.7559, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 376 }, { "epoch": 0.9496221662468514, "grad_norm": 1.1953125, "learning_rate": 8.392679680567879e-06, "loss": 0.793, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 377 }, { "epoch": 0.9521410579345088, "grad_norm": 1.1328125, "learning_rate": 8.389664369859969e-06, "loss": 0.7383, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 378 }, { "epoch": 0.9546599496221663, "grad_norm": 5.4375, "learning_rate": 8.38663697104677e-06, "loss": 0.7324, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 379 }, { "epoch": 0.9571788413098237, "grad_norm": 1.140625, "learning_rate": 8.383597411292124e-06, "loss": 0.7949, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 380 }, { "epoch": 0.9596977329974811, "grad_norm": 1.109375, "learning_rate": 8.380545617173523e-06, "loss": 0.7471, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 381 }, { "epoch": 0.9622166246851386, "grad_norm": 1.15625, "learning_rate": 8.377481514676227e-06, "loss": 0.8252, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 382 }, { "epoch": 0.964735516372796, "grad_norm": 1.0859375, "learning_rate": 8.374405029187249e-06, "loss": 0.7275, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 383 }, { "epoch": 0.9672544080604534, "grad_norm": 1.3515625, "learning_rate": 8.371316085489314e-06, "loss": 0.8535, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 384 }, { "epoch": 0.9697732997481109, "grad_norm": 1.0078125, "learning_rate": 8.368214607754734e-06, "loss": 0.7021, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 385 }, { "epoch": 0.9722921914357683, "grad_norm": 1.046875, "learning_rate": 8.365100519539192e-06, "loss": 0.7461, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 386 }, { "epoch": 0.9748110831234257, "grad_norm": 1.15625, "learning_rate": 8.361973743775462e-06, "loss": 0.7598, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 387 }, { "epoch": 0.9773299748110831, "grad_norm": 1.4609375, "learning_rate": 8.358834202767068e-06, "loss": 0.7588, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 388 }, { "epoch": 0.9798488664987406, "grad_norm": 1.0703125, "learning_rate": 8.355681818181818e-06, "loss": 0.7598, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 389 }, { "epoch": 0.982367758186398, "grad_norm": 1.203125, "learning_rate": 8.35251651104532e-06, "loss": 0.7852, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 390 }, { "epoch": 0.9848866498740554, "grad_norm": 1.296875, "learning_rate": 8.349338201734368e-06, "loss": 0.7285, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 391 }, { "epoch": 0.9874055415617129, "grad_norm": 1.0234375, "learning_rate": 8.346146809970272e-06, "loss": 0.8027, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 392 }, { "epoch": 0.9899244332493703, "grad_norm": 1.125, "learning_rate": 8.342942254812099e-06, "loss": 0.749, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 393 }, { "epoch": 0.9924433249370277, "grad_norm": 1.109375, "learning_rate": 8.339724454649827e-06, "loss": 0.7881, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 394 }, { "epoch": 0.9949622166246851, "grad_norm": 1.71875, "learning_rate": 8.336493327197423e-06, "loss": 0.7803, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 395 }, { "epoch": 0.9974811083123426, "grad_norm": 1.1953125, "learning_rate": 8.33324878948582e-06, "loss": 0.8389, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 396 }, { "epoch": 1.0, "grad_norm": 1.203125, "learning_rate": 8.329990757855822e-06, "loss": 0.8027, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 397 }, { "epoch": 1.0025188916876575, "grad_norm": 1.21875, "learning_rate": 8.326719147950914e-06, "loss": 0.7734, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 398 }, { "epoch": 1.0050377833753148, "grad_norm": 1.1171875, "learning_rate": 8.323433874709977e-06, "loss": 0.752, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 399 }, { "epoch": 1.0075566750629723, "grad_norm": 1.2578125, "learning_rate": 8.320134852359917e-06, "loss": 0.7939, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 400 }, { "epoch": 1.0100755667506298, "grad_norm": 1.234375, "learning_rate": 8.316821994408202e-06, "loss": 0.833, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 401 }, { "epoch": 1.012594458438287, "grad_norm": 1.203125, "learning_rate": 8.313495213635301e-06, "loss": 0.6582, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 402 }, { "epoch": 1.0151133501259446, "grad_norm": 1.03125, "learning_rate": 8.310154422087037e-06, "loss": 0.752, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 403 }, { "epoch": 1.0176322418136021, "grad_norm": 1.03125, "learning_rate": 8.306799531066822e-06, "loss": 0.7354, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 404 }, { "epoch": 1.0201511335012594, "grad_norm": 1.046875, "learning_rate": 8.30343045112782e-06, "loss": 0.749, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 405 }, { "epoch": 1.022670025188917, "grad_norm": 1.1796875, "learning_rate": 8.300047092064987e-06, "loss": 0.7031, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 406 }, { "epoch": 1.0251889168765742, "grad_norm": 1.421875, "learning_rate": 8.296649362907031e-06, "loss": 0.6973, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 407 }, { "epoch": 1.0277078085642317, "grad_norm": 1.2109375, "learning_rate": 8.293237171908252e-06, "loss": 0.6943, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 408 }, { "epoch": 1.0302267002518892, "grad_norm": 1.0390625, "learning_rate": 8.289810426540285e-06, "loss": 0.6475, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 409 }, { "epoch": 1.0327455919395465, "grad_norm": 1.125, "learning_rate": 8.286369033483732e-06, "loss": 0.7861, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 410 }, { "epoch": 1.035264483627204, "grad_norm": 1.0859375, "learning_rate": 8.282912898619704e-06, "loss": 0.7939, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 411 }, { "epoch": 1.0377833753148615, "grad_norm": 1.0546875, "learning_rate": 8.279441927021226e-06, "loss": 0.6094, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 412 }, { "epoch": 1.0403022670025188, "grad_norm": 1.1484375, "learning_rate": 8.275956022944552e-06, "loss": 0.6543, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 413 }, { "epoch": 1.0428211586901763, "grad_norm": 1.140625, "learning_rate": 8.272455089820359e-06, "loss": 0.7412, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 414 }, { "epoch": 1.0453400503778338, "grad_norm": 1.28125, "learning_rate": 8.268939030244837e-06, "loss": 0.7412, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 415 }, { "epoch": 1.0478589420654911, "grad_norm": 1.3046875, "learning_rate": 8.265407745970652e-06, "loss": 0.7227, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 416 }, { "epoch": 1.0503778337531486, "grad_norm": 1.6328125, "learning_rate": 8.261861137897782e-06, "loss": 0.7412, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 417 }, { "epoch": 1.0528967254408061, "grad_norm": 1.0546875, "learning_rate": 8.258299106064267e-06, "loss": 0.6191, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 418 }, { "epoch": 1.0554156171284634, "grad_norm": 1.03125, "learning_rate": 8.254721549636805e-06, "loss": 0.6074, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 419 }, { "epoch": 1.057934508816121, "grad_norm": 1.109375, "learning_rate": 8.251128366901237e-06, "loss": 0.6807, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 420 }, { "epoch": 1.0604534005037782, "grad_norm": 1.546875, "learning_rate": 8.247519455252918e-06, "loss": 0.7637, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 421 }, { "epoch": 1.0629722921914357, "grad_norm": 1.0703125, "learning_rate": 8.243894711186936e-06, "loss": 0.6602, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 422 }, { "epoch": 1.0654911838790933, "grad_norm": 1.1484375, "learning_rate": 8.240254030288227e-06, "loss": 0.54, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 423 }, { "epoch": 1.0680100755667505, "grad_norm": 1.0703125, "learning_rate": 8.236597307221542e-06, "loss": 0.6533, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 424 }, { "epoch": 1.070528967254408, "grad_norm": 1.2578125, "learning_rate": 8.232924435721296e-06, "loss": 0.6001, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 425 }, { "epoch": 1.0730478589420656, "grad_norm": 1.1171875, "learning_rate": 8.229235308581265e-06, "loss": 0.6147, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 426 }, { "epoch": 1.0755667506297228, "grad_norm": 1.4765625, "learning_rate": 8.22552981764416e-06, "loss": 0.7305, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 427 }, { "epoch": 1.0780856423173804, "grad_norm": 1.2265625, "learning_rate": 8.22180785379106e-06, "loss": 0.5825, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 428 }, { "epoch": 1.0806045340050379, "grad_norm": 1.25, "learning_rate": 8.218069306930693e-06, "loss": 0.6865, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 429 }, { "epoch": 1.0831234256926952, "grad_norm": 1.203125, "learning_rate": 8.214314065988588e-06, "loss": 0.6436, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 430 }, { "epoch": 1.0856423173803527, "grad_norm": 1.2421875, "learning_rate": 8.210542018896072e-06, "loss": 0.6865, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 431 }, { "epoch": 1.0881612090680102, "grad_norm": 1.2265625, "learning_rate": 8.206753052579118e-06, "loss": 0.5557, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 432 }, { "epoch": 1.0906801007556675, "grad_norm": 1.2734375, "learning_rate": 8.202947052947053e-06, "loss": 0.5801, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 433 }, { "epoch": 1.093198992443325, "grad_norm": 1.359375, "learning_rate": 8.1991239048811e-06, "loss": 0.606, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 434 }, { "epoch": 1.0957178841309823, "grad_norm": 1.3125, "learning_rate": 8.19528349222278e-06, "loss": 0.6387, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 435 }, { "epoch": 1.0982367758186398, "grad_norm": 1.0703125, "learning_rate": 8.191425697762133e-06, "loss": 0.6489, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 436 }, { "epoch": 1.1007556675062973, "grad_norm": 1.0078125, "learning_rate": 8.187550403225805e-06, "loss": 0.5254, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 437 }, { "epoch": 1.1032745591939546, "grad_norm": 1.140625, "learning_rate": 8.183657489264967e-06, "loss": 0.5967, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 438 }, { "epoch": 1.105793450881612, "grad_norm": 1.484375, "learning_rate": 8.179746835443038e-06, "loss": 0.7021, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 439 }, { "epoch": 1.1083123425692696, "grad_norm": 1.3359375, "learning_rate": 8.175818320223294e-06, "loss": 0.665, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 440 }, { "epoch": 1.1108312342569269, "grad_norm": 1.265625, "learning_rate": 8.171871820956256e-06, "loss": 0.7402, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 441 }, { "epoch": 1.1133501259445844, "grad_norm": 1.0859375, "learning_rate": 8.16790721386694e-06, "loss": 0.6133, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 442 }, { "epoch": 1.1158690176322419, "grad_norm": 1.125, "learning_rate": 8.1639243740419e-06, "loss": 0.5957, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 443 }, { "epoch": 1.1183879093198992, "grad_norm": 1.0859375, "learning_rate": 8.159923175416133e-06, "loss": 0.5791, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 444 }, { "epoch": 1.1209068010075567, "grad_norm": 1.4453125, "learning_rate": 8.155903490759754e-06, "loss": 0.6162, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 445 }, { "epoch": 1.1234256926952142, "grad_norm": 1.125, "learning_rate": 8.151865191664523e-06, "loss": 0.6318, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 446 }, { "epoch": 1.1259445843828715, "grad_norm": 1.1796875, "learning_rate": 8.147808148530169e-06, "loss": 0.7324, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 447 }, { "epoch": 1.128463476070529, "grad_norm": 1.1328125, "learning_rate": 8.14373223055053e-06, "loss": 0.6162, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 448 }, { "epoch": 1.1309823677581865, "grad_norm": 1.296875, "learning_rate": 8.139637305699483e-06, "loss": 0.5923, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 449 }, { "epoch": 1.1335012594458438, "grad_norm": 1.1953125, "learning_rate": 8.135523240716697e-06, "loss": 0.7002, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 450 }, { "epoch": 1.1360201511335013, "grad_norm": 1.078125, "learning_rate": 8.13138990109318e-06, "loss": 0.5576, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 451 }, { "epoch": 1.1385390428211588, "grad_norm": 1.1015625, "learning_rate": 8.127237151056614e-06, "loss": 0.5786, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 452 }, { "epoch": 1.141057934508816, "grad_norm": 1.0703125, "learning_rate": 8.123064853556487e-06, "loss": 0.6289, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 453 }, { "epoch": 1.1435768261964736, "grad_norm": 1.109375, "learning_rate": 8.118872870249019e-06, "loss": 0.6338, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 454 }, { "epoch": 1.146095717884131, "grad_norm": 1.1875, "learning_rate": 8.114661061481872e-06, "loss": 0.5381, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 455 }, { "epoch": 1.1486146095717884, "grad_norm": 1.140625, "learning_rate": 8.11042928627864e-06, "loss": 0.5938, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 456 }, { "epoch": 1.151133501259446, "grad_norm": 1.125, "learning_rate": 8.106177402323124e-06, "loss": 0.627, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 457 }, { "epoch": 1.1536523929471032, "grad_norm": 1.1171875, "learning_rate": 8.101905265943371e-06, "loss": 0.6172, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 458 }, { "epoch": 1.1561712846347607, "grad_norm": 1.3203125, "learning_rate": 8.09761273209549e-06, "loss": 0.6523, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 459 }, { "epoch": 1.1586901763224182, "grad_norm": 1.0546875, "learning_rate": 8.093299654347248e-06, "loss": 0.582, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 460 }, { "epoch": 1.1612090680100755, "grad_norm": 1.171875, "learning_rate": 8.088965884861407e-06, "loss": 0.543, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 461 }, { "epoch": 1.163727959697733, "grad_norm": 1.2578125, "learning_rate": 8.084611274378841e-06, "loss": 0.6973, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 462 }, { "epoch": 1.1662468513853903, "grad_norm": 1.109375, "learning_rate": 8.080235672201393e-06, "loss": 0.6318, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 463 }, { "epoch": 1.1687657430730478, "grad_norm": 1.1796875, "learning_rate": 8.075838926174495e-06, "loss": 0.623, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 464 }, { "epoch": 1.1712846347607053, "grad_norm": 1.0703125, "learning_rate": 8.071420882669536e-06, "loss": 0.6089, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 465 }, { "epoch": 1.1738035264483626, "grad_norm": 1.2421875, "learning_rate": 8.066981386565956e-06, "loss": 0.6279, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 466 }, { "epoch": 1.1763224181360201, "grad_norm": 1.171875, "learning_rate": 8.0625202812331e-06, "loss": 0.6333, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 467 }, { "epoch": 1.1788413098236776, "grad_norm": 1.078125, "learning_rate": 8.058037408511792e-06, "loss": 0.583, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 468 }, { "epoch": 1.181360201511335, "grad_norm": 1.140625, "learning_rate": 8.053532608695653e-06, "loss": 0.6504, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 469 }, { "epoch": 1.1838790931989924, "grad_norm": 1.203125, "learning_rate": 8.049005720512121e-06, "loss": 0.5947, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 470 }, { "epoch": 1.18639798488665, "grad_norm": 1.2109375, "learning_rate": 8.044456581103222e-06, "loss": 0.5234, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 471 }, { "epoch": 1.1889168765743072, "grad_norm": 1.046875, "learning_rate": 8.039885026006022e-06, "loss": 0.6035, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 472 }, { "epoch": 1.1914357682619647, "grad_norm": 1.359375, "learning_rate": 8.035290889132821e-06, "loss": 0.6455, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 473 }, { "epoch": 1.1939546599496222, "grad_norm": 1.1015625, "learning_rate": 8.03067400275103e-06, "loss": 0.541, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 474 }, { "epoch": 1.1964735516372795, "grad_norm": 1.09375, "learning_rate": 8.026034197462769e-06, "loss": 0.604, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 475 }, { "epoch": 1.198992443324937, "grad_norm": 1.1015625, "learning_rate": 8.02137130218413e-06, "loss": 0.6338, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 476 }, { "epoch": 1.2015113350125946, "grad_norm": 1.3203125, "learning_rate": 8.016685144124168e-06, "loss": 0.5977, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 477 }, { "epoch": 1.2040302267002518, "grad_norm": 1.3828125, "learning_rate": 8.011975548763545e-06, "loss": 0.6777, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 478 }, { "epoch": 1.2065491183879093, "grad_norm": 1.2890625, "learning_rate": 8.00724233983287e-06, "loss": 0.7031, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 479 }, { "epoch": 1.2090680100755669, "grad_norm": 1.1484375, "learning_rate": 8.002485339290701e-06, "loss": 0.5928, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 480 }, { "epoch": 1.2115869017632241, "grad_norm": 1.03125, "learning_rate": 7.997704367301232e-06, "loss": 0.5332, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 481 }, { "epoch": 1.2141057934508817, "grad_norm": 1.140625, "learning_rate": 7.992899242211619e-06, "loss": 0.6152, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 482 }, { "epoch": 1.2166246851385392, "grad_norm": 1.09375, "learning_rate": 7.988069780528983e-06, "loss": 0.6108, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 483 }, { "epoch": 1.2191435768261965, "grad_norm": 1.03125, "learning_rate": 7.983215796897038e-06, "loss": 0.5781, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 484 }, { "epoch": 1.221662468513854, "grad_norm": 1.0859375, "learning_rate": 7.978337104072398e-06, "loss": 0.5532, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 485 }, { "epoch": 1.2241813602015112, "grad_norm": 1.0859375, "learning_rate": 7.973433512900482e-06, "loss": 0.6104, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 486 }, { "epoch": 1.2267002518891688, "grad_norm": 1.0859375, "learning_rate": 7.968504832291075e-06, "loss": 0.563, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 487 }, { "epoch": 1.2292191435768263, "grad_norm": 1.1171875, "learning_rate": 7.963550869193502e-06, "loss": 0.626, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 488 }, { "epoch": 1.2317380352644836, "grad_norm": 1.28125, "learning_rate": 7.958571428571428e-06, "loss": 0.7334, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 489 }, { "epoch": 1.234256926952141, "grad_norm": 1.0625, "learning_rate": 7.953566313377257e-06, "loss": 0.6025, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 490 }, { "epoch": 1.2367758186397986, "grad_norm": 1.1328125, "learning_rate": 7.948535324526135e-06, "loss": 0.5312, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 491 }, { "epoch": 1.2392947103274559, "grad_norm": 1.03125, "learning_rate": 7.943478260869564e-06, "loss": 0.5693, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 492 }, { "epoch": 1.2418136020151134, "grad_norm": 1.03125, "learning_rate": 7.938394919168591e-06, "loss": 0.625, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 493 }, { "epoch": 1.2443324937027707, "grad_norm": 1.1171875, "learning_rate": 7.93328509406657e-06, "loss": 0.5771, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 494 }, { "epoch": 1.2468513853904282, "grad_norm": 1.3046875, "learning_rate": 7.92814857806152e-06, "loss": 0.627, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 495 }, { "epoch": 1.2493702770780857, "grad_norm": 1.046875, "learning_rate": 7.922985161478033e-06, "loss": 0.6416, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 496 }, { "epoch": 1.251889168765743, "grad_norm": 1.2265625, "learning_rate": 7.91779463243874e-06, "loss": 0.6465, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 497 }, { "epoch": 1.2544080604534005, "grad_norm": 1.0859375, "learning_rate": 7.912576776835331e-06, "loss": 0.5571, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 498 }, { "epoch": 1.256926952141058, "grad_norm": 1.3046875, "learning_rate": 7.90733137829912e-06, "loss": 0.5479, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 499 }, { "epoch": 1.2594458438287153, "grad_norm": 1.09375, "learning_rate": 7.902058218171127e-06, "loss": 0.5889, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 500 }, { "epoch": 1.2619647355163728, "grad_norm": 1.171875, "learning_rate": 7.896757075471699e-06, "loss": 0.6406, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 501 }, { "epoch": 1.2644836272040303, "grad_norm": 1.1171875, "learning_rate": 7.89142772686964e-06, "loss": 0.5518, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 502 }, { "epoch": 1.2670025188916876, "grad_norm": 1.3203125, "learning_rate": 7.886069946650858e-06, "loss": 0.6626, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 503 }, { "epoch": 1.269521410579345, "grad_norm": 1.7890625, "learning_rate": 7.880683506686479e-06, "loss": 0.6904, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 504 }, { "epoch": 1.2720403022670026, "grad_norm": 1.125, "learning_rate": 7.875268176400478e-06, "loss": 0.5835, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 505 }, { "epoch": 1.2745591939546599, "grad_norm": 1.0703125, "learning_rate": 7.86982372273678e-06, "loss": 0.5952, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 506 }, { "epoch": 1.2770780856423174, "grad_norm": 1.1328125, "learning_rate": 7.864349910125824e-06, "loss": 0.6738, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 507 }, { "epoch": 1.279596977329975, "grad_norm": 1.046875, "learning_rate": 7.858846500450585e-06, "loss": 0.6309, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 508 }, { "epoch": 1.2821158690176322, "grad_norm": 1.40625, "learning_rate": 7.853313253012047e-06, "loss": 0.5762, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 509 }, { "epoch": 1.2846347607052897, "grad_norm": 1.1328125, "learning_rate": 7.84774992449411e-06, "loss": 0.5483, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 510 }, { "epoch": 1.2871536523929472, "grad_norm": 1.34375, "learning_rate": 7.842156268927923e-06, "loss": 0.6191, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 511 }, { "epoch": 1.2896725440806045, "grad_norm": 1.109375, "learning_rate": 7.836532037655633e-06, "loss": 0.5967, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 512 }, { "epoch": 1.292191435768262, "grad_norm": 1.0390625, "learning_rate": 7.830876979293545e-06, "loss": 0.5459, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 513 }, { "epoch": 1.2947103274559195, "grad_norm": 1.0234375, "learning_rate": 7.825190839694657e-06, "loss": 0.519, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 514 }, { "epoch": 1.2972292191435768, "grad_norm": 0.96875, "learning_rate": 7.819473361910595e-06, "loss": 0.585, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 515 }, { "epoch": 1.2997481108312343, "grad_norm": 1.75, "learning_rate": 7.813724286152902e-06, "loss": 0.6367, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 516 }, { "epoch": 1.3022670025188918, "grad_norm": 1.6640625, "learning_rate": 7.807943349753695e-06, "loss": 0.7354, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 517 }, { "epoch": 1.3047858942065491, "grad_norm": 1.46875, "learning_rate": 7.802130287125656e-06, "loss": 0.7051, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 518 }, { "epoch": 1.3073047858942066, "grad_norm": 1.1015625, "learning_rate": 7.796284829721362e-06, "loss": 0.5781, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 519 }, { "epoch": 1.309823677581864, "grad_norm": 1.3359375, "learning_rate": 7.790406705991928e-06, "loss": 0.5781, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 520 }, { "epoch": 1.3123425692695214, "grad_norm": 1.2578125, "learning_rate": 7.784495641344955e-06, "loss": 0.5898, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 521 }, { "epoch": 1.3148614609571787, "grad_norm": 1.3359375, "learning_rate": 7.778551358101779e-06, "loss": 0.541, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 522 }, { "epoch": 1.3173803526448362, "grad_norm": 1.140625, "learning_rate": 7.772573575453976e-06, "loss": 0.5757, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 523 }, { "epoch": 1.3198992443324937, "grad_norm": 1.0859375, "learning_rate": 7.766562009419153e-06, "loss": 0.5635, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 524 }, { "epoch": 1.322418136020151, "grad_norm": 1.3984375, "learning_rate": 7.760516372795968e-06, "loss": 0.5361, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 525 }, { "epoch": 1.3249370277078085, "grad_norm": 1.2578125, "learning_rate": 7.754436375118409e-06, "loss": 0.6465, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 526 }, { "epoch": 1.327455919395466, "grad_norm": 1.2578125, "learning_rate": 7.748321722609246e-06, "loss": 0.5176, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 527 }, { "epoch": 1.3299748110831233, "grad_norm": 1.1015625, "learning_rate": 7.742172118132741e-06, "loss": 0.5283, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 528 }, { "epoch": 1.3324937027707808, "grad_norm": 1.1953125, "learning_rate": 7.735987261146496e-06, "loss": 0.6826, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 529 }, { "epoch": 1.3350125944584383, "grad_norm": 1.2265625, "learning_rate": 7.729766847652507e-06, "loss": 0.5171, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 530 }, { "epoch": 1.3375314861460956, "grad_norm": 1.296875, "learning_rate": 7.723510570147342e-06, "loss": 0.6035, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 531 }, { "epoch": 1.3400503778337531, "grad_norm": 1.1875, "learning_rate": 7.717218117571474e-06, "loss": 0.5654, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 532 }, { "epoch": 1.3425692695214106, "grad_norm": 1.390625, "learning_rate": 7.710889175257731e-06, "loss": 0.6338, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 533 }, { "epoch": 1.345088161209068, "grad_norm": 1.2421875, "learning_rate": 7.704523424878836e-06, "loss": 0.626, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 534 }, { "epoch": 1.3476070528967254, "grad_norm": 1.0859375, "learning_rate": 7.698120544394038e-06, "loss": 0.6177, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 535 }, { "epoch": 1.350125944584383, "grad_norm": 1.1484375, "learning_rate": 7.6916802079948e-06, "loss": 0.5503, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 536 }, { "epoch": 1.3526448362720402, "grad_norm": 1.734375, "learning_rate": 7.685202086049545e-06, "loss": 0.7041, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 537 }, { "epoch": 1.3551637279596978, "grad_norm": 1.0703125, "learning_rate": 7.678685845047401e-06, "loss": 0.6699, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 538 }, { "epoch": 1.3576826196473553, "grad_norm": 1.0390625, "learning_rate": 7.672131147540983e-06, "loss": 0.5859, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 539 }, { "epoch": 1.3602015113350125, "grad_norm": 1.1171875, "learning_rate": 7.665537652088127e-06, "loss": 0.5518, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 540 }, { "epoch": 1.36272040302267, "grad_norm": 1.1328125, "learning_rate": 7.658905013192612e-06, "loss": 0.5264, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 541 }, { "epoch": 1.3652392947103276, "grad_norm": 1.046875, "learning_rate": 7.652232881243798e-06, "loss": 0.5415, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 542 }, { "epoch": 1.3677581863979849, "grad_norm": 1.046875, "learning_rate": 7.645520902455209e-06, "loss": 0.6045, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 543 }, { "epoch": 1.3702770780856424, "grad_norm": 1.09375, "learning_rate": 7.638768718801996e-06, "loss": 0.6045, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 544 }, { "epoch": 1.3727959697732999, "grad_norm": 1.109375, "learning_rate": 7.631975967957276e-06, "loss": 0.6113, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 545 }, { "epoch": 1.3753148614609572, "grad_norm": 1.1171875, "learning_rate": 7.6251422832273185e-06, "loss": 0.5947, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 546 }, { "epoch": 1.3778337531486147, "grad_norm": 1.8828125, "learning_rate": 7.61826729348556e-06, "loss": 0.5356, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 547 }, { "epoch": 1.380352644836272, "grad_norm": 1.2109375, "learning_rate": 7.611350623105423e-06, "loss": 0.667, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 548 }, { "epoch": 1.3828715365239295, "grad_norm": 1.1015625, "learning_rate": 7.604391891891891e-06, "loss": 0.6426, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 549 }, { "epoch": 1.385390428211587, "grad_norm": 1.0859375, "learning_rate": 7.59739071501186e-06, "loss": 0.521, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 550 }, { "epoch": 1.3879093198992443, "grad_norm": 1.1484375, "learning_rate": 7.590346702923181e-06, "loss": 0.6196, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 551 }, { "epoch": 1.3904282115869018, "grad_norm": 1.2421875, "learning_rate": 7.583259461302422e-06, "loss": 0.6191, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 552 }, { "epoch": 1.392947103274559, "grad_norm": 1.15625, "learning_rate": 7.576128590971271e-06, "loss": 0.5693, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 553 }, { "epoch": 1.3954659949622166, "grad_norm": 1.09375, "learning_rate": 7.568953687821612e-06, "loss": 0.6035, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 554 }, { "epoch": 1.397984886649874, "grad_norm": 1.0234375, "learning_rate": 7.56173434273916e-06, "loss": 0.5615, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 555 }, { "epoch": 1.4005037783375314, "grad_norm": 1.171875, "learning_rate": 7.554470141525717e-06, "loss": 0.7168, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 556 }, { "epoch": 1.4030226700251889, "grad_norm": 1.15625, "learning_rate": 7.547160664819944e-06, "loss": 0.5801, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 557 }, { "epoch": 1.4055415617128464, "grad_norm": 1.125, "learning_rate": 7.539805488016674e-06, "loss": 0.5908, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 558 }, { "epoch": 1.4080604534005037, "grad_norm": 1.0546875, "learning_rate": 7.5324041811846675e-06, "loss": 0.5557, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 559 }, { "epoch": 1.4105793450881612, "grad_norm": 1.109375, "learning_rate": 7.524956308982874e-06, "loss": 0.5801, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 560 }, { "epoch": 1.4130982367758187, "grad_norm": 1.078125, "learning_rate": 7.517461430575035e-06, "loss": 0.5801, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 561 }, { "epoch": 1.415617128463476, "grad_norm": 1.03125, "learning_rate": 7.509919099542738e-06, "loss": 0.5811, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 562 }, { "epoch": 1.4181360201511335, "grad_norm": 1.1171875, "learning_rate": 7.502328863796753e-06, "loss": 0.5918, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 563 }, { "epoch": 1.420654911838791, "grad_norm": 1.109375, "learning_rate": 7.494690265486726e-06, "loss": 0.5908, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 564 }, { "epoch": 1.4231738035264483, "grad_norm": 1.078125, "learning_rate": 7.4870028409090914e-06, "loss": 0.519, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 565 }, { "epoch": 1.4256926952141058, "grad_norm": 1.1328125, "learning_rate": 7.4792661204132534e-06, "loss": 0.5625, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 566 }, { "epoch": 1.4282115869017633, "grad_norm": 1.140625, "learning_rate": 7.471479628305932e-06, "loss": 0.5566, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 567 }, { "epoch": 1.4307304785894206, "grad_norm": 1.1015625, "learning_rate": 7.463642882753676e-06, "loss": 0.5601, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 568 }, { "epoch": 1.433249370277078, "grad_norm": 1.125, "learning_rate": 7.455755395683455e-06, "loss": 0.5454, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 569 }, { "epoch": 1.4357682619647356, "grad_norm": 1.046875, "learning_rate": 7.447816672681343e-06, "loss": 0.5957, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 570 }, { "epoch": 1.438287153652393, "grad_norm": 1.2109375, "learning_rate": 7.439826212889211e-06, "loss": 0.6152, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 571 }, { "epoch": 1.4408060453400504, "grad_norm": 1.203125, "learning_rate": 7.4317835088993834e-06, "loss": 0.6162, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 572 }, { "epoch": 1.443324937027708, "grad_norm": 1.203125, "learning_rate": 7.4236880466472295e-06, "loss": 0.5479, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 573 }, { "epoch": 1.4458438287153652, "grad_norm": 1.296875, "learning_rate": 7.415539305301645e-06, "loss": 0.542, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 574 }, { "epoch": 1.4483627204030227, "grad_norm": 1.140625, "learning_rate": 7.407336757153339e-06, "loss": 0.5635, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 575 }, { "epoch": 1.4508816120906802, "grad_norm": 1.0703125, "learning_rate": 7.399079867500921e-06, "loss": 0.5957, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 576 }, { "epoch": 1.4534005037783375, "grad_norm": 1.265625, "learning_rate": 7.390768094534711e-06, "loss": 0.623, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 577 }, { "epoch": 1.455919395465995, "grad_norm": 0.98828125, "learning_rate": 7.3824008892182295e-06, "loss": 0.5332, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 578 }, { "epoch": 1.4584382871536523, "grad_norm": 1.3046875, "learning_rate": 7.373977695167287e-06, "loss": 0.5981, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 579 }, { "epoch": 1.4609571788413098, "grad_norm": 1.1171875, "learning_rate": 7.3654979485266705e-06, "loss": 0.5723, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 580 }, { "epoch": 1.4634760705289673, "grad_norm": 1.5703125, "learning_rate": 7.356961077844312e-06, "loss": 0.6895, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 581 }, { "epoch": 1.4659949622166246, "grad_norm": 1.203125, "learning_rate": 7.348366503942921e-06, "loss": 0.5957, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 582 }, { "epoch": 1.4685138539042821, "grad_norm": 1.3046875, "learning_rate": 7.339713639788997e-06, "loss": 0.5889, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 583 }, { "epoch": 1.4710327455919394, "grad_norm": 1.046875, "learning_rate": 7.331001890359168e-06, "loss": 0.5405, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 584 }, { "epoch": 1.473551637279597, "grad_norm": 1.0390625, "learning_rate": 7.322230652503794e-06, "loss": 0.543, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 585 }, { "epoch": 1.4760705289672544, "grad_norm": 1.1015625, "learning_rate": 7.313399314807765e-06, "loss": 0.4941, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 586 }, { "epoch": 1.4785894206549117, "grad_norm": 1.140625, "learning_rate": 7.304507257448434e-06, "loss": 0.6592, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 587 }, { "epoch": 1.4811083123425692, "grad_norm": 1.0234375, "learning_rate": 7.295553852050595e-06, "loss": 0.5254, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 588 }, { "epoch": 1.4836272040302267, "grad_norm": 1.171875, "learning_rate": 7.286538461538463e-06, "loss": 0.6182, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 589 }, { "epoch": 1.486146095717884, "grad_norm": 1.1484375, "learning_rate": 7.277460439984562e-06, "loss": 0.6104, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 590 }, { "epoch": 1.4886649874055415, "grad_norm": 1.640625, "learning_rate": 7.2683191324554605e-06, "loss": 0.6328, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 591 }, { "epoch": 1.491183879093199, "grad_norm": 1.25, "learning_rate": 7.259113874854255e-06, "loss": 0.6514, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 592 }, { "epoch": 1.4937027707808563, "grad_norm": 1.2109375, "learning_rate": 7.24984399375975e-06, "loss": 0.6387, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 593 }, { "epoch": 1.4962216624685138, "grad_norm": 1.109375, "learning_rate": 7.240508806262231e-06, "loss": 0.5684, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 594 }, { "epoch": 1.4987405541561714, "grad_norm": 1.0546875, "learning_rate": 7.231107619795758e-06, "loss": 0.5933, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 595 }, { "epoch": 1.5012594458438286, "grad_norm": 3.46875, "learning_rate": 7.22163973196689e-06, "loss": 0.5352, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 596 }, { "epoch": 1.5037783375314862, "grad_norm": 1.2265625, "learning_rate": 7.212104430379746e-06, "loss": 0.5791, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 597 }, { "epoch": 1.5062972292191437, "grad_norm": 1.1484375, "learning_rate": 7.202500992457324e-06, "loss": 0.6133, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 598 }, { "epoch": 1.508816120906801, "grad_norm": 1.1328125, "learning_rate": 7.192828685258965e-06, "loss": 0.6348, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 599 }, { "epoch": 1.5113350125944585, "grad_norm": 1.3203125, "learning_rate": 7.183086765293882e-06, "loss": 0.6475, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 600 }, { "epoch": 1.513853904282116, "grad_norm": 1.2578125, "learning_rate": 7.1732744783306575e-06, "loss": 0.6533, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 601 }, { "epoch": 1.5163727959697733, "grad_norm": 1.3515625, "learning_rate": 7.163391059202577e-06, "loss": 0.6436, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 602 }, { "epoch": 1.5188916876574308, "grad_norm": 1.1328125, "learning_rate": 7.153435731608732e-06, "loss": 0.5869, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 603 }, { "epoch": 1.5214105793450883, "grad_norm": 1.3046875, "learning_rate": 7.143407707910751e-06, "loss": 0.5352, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 604 }, { "epoch": 1.5239294710327456, "grad_norm": 1.140625, "learning_rate": 7.133306188925081e-06, "loss": 0.5957, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 605 }, { "epoch": 1.526448362720403, "grad_norm": 1.484375, "learning_rate": 7.1231303637106655e-06, "loss": 0.562, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 606 }, { "epoch": 1.5289672544080606, "grad_norm": 1.1640625, "learning_rate": 7.112879409351928e-06, "loss": 0.5493, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 607 }, { "epoch": 1.5314861460957179, "grad_norm": 1.6640625, "learning_rate": 7.1025524907369295e-06, "loss": 0.5938, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 608 }, { "epoch": 1.5340050377833752, "grad_norm": 1.109375, "learning_rate": 7.092148760330578e-06, "loss": 0.5918, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 609 }, { "epoch": 1.536523929471033, "grad_norm": 1.109375, "learning_rate": 7.081667357942762e-06, "loss": 0.5254, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 610 }, { "epoch": 1.5390428211586902, "grad_norm": 1.1875, "learning_rate": 7.0711074104912574e-06, "loss": 0.5703, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 611 }, { "epoch": 1.5415617128463475, "grad_norm": 1.15625, "learning_rate": 7.060468031759298e-06, "loss": 0.543, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 612 }, { "epoch": 1.5440806045340052, "grad_norm": 1.15625, "learning_rate": 7.049748322147652e-06, "loss": 0.5547, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 613 }, { "epoch": 1.5465994962216625, "grad_norm": 1.2109375, "learning_rate": 7.038947368421053e-06, "loss": 0.5391, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 614 }, { "epoch": 1.5491183879093198, "grad_norm": 1.1171875, "learning_rate": 7.028064243448858e-06, "loss": 0.5669, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 615 }, { "epoch": 1.5516372795969773, "grad_norm": 2.1875, "learning_rate": 7.017098005939754e-06, "loss": 0.5352, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 616 }, { "epoch": 1.5541561712846348, "grad_norm": 1.0859375, "learning_rate": 7.006047700170358e-06, "loss": 0.5576, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 617 }, { "epoch": 1.556675062972292, "grad_norm": 1.1015625, "learning_rate": 6.994912355707567e-06, "loss": 0.6582, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 618 }, { "epoch": 1.5591939546599496, "grad_norm": 1.046875, "learning_rate": 6.983690987124463e-06, "loss": 0.6143, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 619 }, { "epoch": 1.561712846347607, "grad_norm": 1.1484375, "learning_rate": 6.972382593709607e-06, "loss": 0.5811, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 620 }, { "epoch": 1.5642317380352644, "grad_norm": 1.1796875, "learning_rate": 6.96098615916955e-06, "loss": 0.5005, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 621 }, { "epoch": 1.566750629722922, "grad_norm": 1.46875, "learning_rate": 6.9495006513243595e-06, "loss": 0.5732, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 622 }, { "epoch": 1.5692695214105794, "grad_norm": 1.1875, "learning_rate": 6.93792502179599e-06, "loss": 0.5938, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 623 }, { "epoch": 1.5717884130982367, "grad_norm": 1.03125, "learning_rate": 6.926258205689278e-06, "loss": 0.5659, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 624 }, { "epoch": 1.5743073047858942, "grad_norm": 1.296875, "learning_rate": 6.914499121265377e-06, "loss": 0.5557, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 625 }, { "epoch": 1.5768261964735517, "grad_norm": 1.0625, "learning_rate": 6.90264666960741e-06, "loss": 0.5498, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 626 }, { "epoch": 1.579345088161209, "grad_norm": 1.03125, "learning_rate": 6.8906997342781225e-06, "loss": 0.5771, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 627 }, { "epoch": 1.5818639798488665, "grad_norm": 1.0625, "learning_rate": 6.878657180969319e-06, "loss": 0.4541, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 628 }, { "epoch": 1.584382871536524, "grad_norm": 1.0703125, "learning_rate": 6.8665178571428565e-06, "loss": 0.5518, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 629 }, { "epoch": 1.5869017632241813, "grad_norm": 1.34375, "learning_rate": 6.854280591662931e-06, "loss": 0.5356, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 630 }, { "epoch": 1.5894206549118388, "grad_norm": 1.0546875, "learning_rate": 6.841944194419442e-06, "loss": 0.5376, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 631 }, { "epoch": 1.5919395465994963, "grad_norm": 2.296875, "learning_rate": 6.829507455942159e-06, "loss": 0.5269, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 632 }, { "epoch": 1.5944584382871536, "grad_norm": 2.125, "learning_rate": 6.816969147005443e-06, "loss": 0.6855, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 633 }, { "epoch": 1.5969773299748111, "grad_norm": 1.1328125, "learning_rate": 6.804328018223235e-06, "loss": 0.5054, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 634 }, { "epoch": 1.5994962216624686, "grad_norm": 1.1015625, "learning_rate": 6.791582799634036e-06, "loss": 0.5635, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 635 }, { "epoch": 1.602015113350126, "grad_norm": 1.140625, "learning_rate": 6.778732200275608e-06, "loss": 0.5918, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 636 }, { "epoch": 1.6045340050377834, "grad_norm": 1.171875, "learning_rate": 6.765774907749076e-06, "loss": 0.5625, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 637 }, { "epoch": 1.607052896725441, "grad_norm": 1.265625, "learning_rate": 6.752709587772116e-06, "loss": 0.5391, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 638 }, { "epoch": 1.6095717884130982, "grad_norm": 1.0546875, "learning_rate": 6.73953488372093e-06, "loss": 0.5488, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 639 }, { "epoch": 1.6120906801007555, "grad_norm": 1.1015625, "learning_rate": 6.726249416160673e-06, "loss": 0.5127, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 640 }, { "epoch": 1.6146095717884132, "grad_norm": 1.078125, "learning_rate": 6.712851782363977e-06, "loss": 0.5591, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 641 }, { "epoch": 1.6171284634760705, "grad_norm": 1.15625, "learning_rate": 6.69934055581724e-06, "loss": 0.583, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 642 }, { "epoch": 1.6196473551637278, "grad_norm": 1.0546875, "learning_rate": 6.685714285714286e-06, "loss": 0.5635, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 643 }, { "epoch": 1.6221662468513856, "grad_norm": 1.078125, "learning_rate": 6.6719714964370535e-06, "loss": 0.5181, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 644 }, { "epoch": 1.6246851385390428, "grad_norm": 1.1328125, "learning_rate": 6.658110687022901e-06, "loss": 0.5801, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 645 }, { "epoch": 1.6272040302267001, "grad_norm": 1.1484375, "learning_rate": 6.644130330618112e-06, "loss": 0.54, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 646 }, { "epoch": 1.6297229219143576, "grad_norm": 1.09375, "learning_rate": 6.630028873917227e-06, "loss": 0.5435, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 647 }, { "epoch": 1.6322418136020151, "grad_norm": 1.21875, "learning_rate": 6.615804736587724e-06, "loss": 0.6396, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 648 }, { "epoch": 1.6347607052896724, "grad_norm": 1.25, "learning_rate": 6.601456310679612e-06, "loss": 0.6768, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 649 }, { "epoch": 1.63727959697733, "grad_norm": 1.0625, "learning_rate": 6.586981960019502e-06, "loss": 0.5396, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 650 }, { "epoch": 1.6397984886649875, "grad_norm": 1.3203125, "learning_rate": 6.572380019588638e-06, "loss": 0.625, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 651 }, { "epoch": 1.6423173803526447, "grad_norm": 1.2109375, "learning_rate": 6.557648794884407e-06, "loss": 0.5977, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 652 }, { "epoch": 1.6448362720403022, "grad_norm": 1.4296875, "learning_rate": 6.542786561264822e-06, "loss": 0.5947, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 653 }, { "epoch": 1.6473551637279598, "grad_norm": 1.1640625, "learning_rate": 6.527791563275434e-06, "loss": 0.5527, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 654 }, { "epoch": 1.649874055415617, "grad_norm": 1.140625, "learning_rate": 6.512662013958125e-06, "loss": 0.5093, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 655 }, { "epoch": 1.6523929471032746, "grad_norm": 1.0625, "learning_rate": 6.497396094141211e-06, "loss": 0.5967, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 656 }, { "epoch": 1.654911838790932, "grad_norm": 1.2421875, "learning_rate": 6.481991951710262e-06, "loss": 0.5459, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 657 }, { "epoch": 1.6574307304785894, "grad_norm": 1.3515625, "learning_rate": 6.46644770085902e-06, "loss": 0.6582, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 658 }, { "epoch": 1.6599496221662469, "grad_norm": 1.171875, "learning_rate": 6.450761421319797e-06, "loss": 0.6172, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 659 }, { "epoch": 1.6624685138539044, "grad_norm": 1.203125, "learning_rate": 6.4349311575726665e-06, "loss": 0.6128, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 660 }, { "epoch": 1.6649874055415617, "grad_norm": 1.0390625, "learning_rate": 6.418954918032787e-06, "loss": 0.5444, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 661 }, { "epoch": 1.6675062972292192, "grad_norm": 1.1484375, "learning_rate": 6.402830674215132e-06, "loss": 0.4937, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 662 }, { "epoch": 1.6700251889168767, "grad_norm": 1.2109375, "learning_rate": 6.3865563598759055e-06, "loss": 0.6523, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 663 }, { "epoch": 1.672544080604534, "grad_norm": 1.484375, "learning_rate": 6.37012987012987e-06, "loss": 0.4893, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 664 }, { "epoch": 1.6750629722921915, "grad_norm": 1.0546875, "learning_rate": 6.353549060542796e-06, "loss": 0.5542, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 665 }, { "epoch": 1.677581863979849, "grad_norm": 1.1171875, "learning_rate": 6.336811746198216e-06, "loss": 0.5479, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 666 }, { "epoch": 1.6801007556675063, "grad_norm": 1.34375, "learning_rate": 6.319915700737619e-06, "loss": 0.6367, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 667 }, { "epoch": 1.6826196473551638, "grad_norm": 1.140625, "learning_rate": 6.302858655373214e-06, "loss": 0.5518, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 668 }, { "epoch": 1.6851385390428213, "grad_norm": 1.0078125, "learning_rate": 6.2856382978723405e-06, "loss": 0.5151, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 669 }, { "epoch": 1.6876574307304786, "grad_norm": 1.546875, "learning_rate": 6.26825227151256e-06, "loss": 0.5605, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 670 }, { "epoch": 1.6901763224181359, "grad_norm": 1.2109375, "learning_rate": 6.250698174006446e-06, "loss": 0.5791, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 671 }, { "epoch": 1.6926952141057936, "grad_norm": 1.078125, "learning_rate": 6.232973556395035e-06, "loss": 0.5752, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 672 }, { "epoch": 1.6952141057934509, "grad_norm": 1.109375, "learning_rate": 6.2150759219088945e-06, "loss": 0.6357, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 673 }, { "epoch": 1.6977329974811082, "grad_norm": 1.3125, "learning_rate": 6.19700272479564e-06, "loss": 0.5342, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 674 }, { "epoch": 1.700251889168766, "grad_norm": 2.015625, "learning_rate": 6.178751369112815e-06, "loss": 0.5449, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 675 }, { "epoch": 1.7027707808564232, "grad_norm": 1.296875, "learning_rate": 6.160319207484865e-06, "loss": 0.5908, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 676 }, { "epoch": 1.7052896725440805, "grad_norm": 1.203125, "learning_rate": 6.14170353982301e-06, "loss": 0.5396, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 677 }, { "epoch": 1.707808564231738, "grad_norm": 1.09375, "learning_rate": 6.122901612006671e-06, "loss": 0.5791, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 678 }, { "epoch": 1.7103274559193955, "grad_norm": 1.484375, "learning_rate": 6.10391061452514e-06, "loss": 0.5688, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 679 }, { "epoch": 1.7128463476070528, "grad_norm": 1.2265625, "learning_rate": 6.084727681078046e-06, "loss": 0.5479, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 680 }, { "epoch": 1.7153652392947103, "grad_norm": 1.1328125, "learning_rate": 6.065349887133184e-06, "loss": 0.667, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 681 }, { "epoch": 1.7178841309823678, "grad_norm": 1.15625, "learning_rate": 6.0457742484401585e-06, "loss": 0.5166, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 682 }, { "epoch": 1.720403022670025, "grad_norm": 1.078125, "learning_rate": 6.0259977194982894e-06, "loss": 0.5928, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 683 }, { "epoch": 1.7229219143576826, "grad_norm": 1.140625, "learning_rate": 6.006017191977077e-06, "loss": 0.5547, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 684 }, { "epoch": 1.7254408060453401, "grad_norm": 1.3125, "learning_rate": 5.985829493087558e-06, "loss": 0.6631, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 685 }, { "epoch": 1.7279596977329974, "grad_norm": 1.125, "learning_rate": 5.965431383902722e-06, "loss": 0.5767, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 686 }, { "epoch": 1.730478589420655, "grad_norm": 1.15625, "learning_rate": 5.944819557625147e-06, "loss": 0.6279, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 687 }, { "epoch": 1.7329974811083124, "grad_norm": 1.140625, "learning_rate": 5.923990637799883e-06, "loss": 0.5347, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 688 }, { "epoch": 1.7355163727959697, "grad_norm": 1.09375, "learning_rate": 5.902941176470587e-06, "loss": 0.5869, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 689 }, { "epoch": 1.7380352644836272, "grad_norm": 1.1796875, "learning_rate": 5.8816676522767595e-06, "loss": 0.541, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 690 }, { "epoch": 1.7405541561712847, "grad_norm": 1.140625, "learning_rate": 5.860166468489893e-06, "loss": 0.5352, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 691 }, { "epoch": 1.743073047858942, "grad_norm": 1.15625, "learning_rate": 5.838433950986252e-06, "loss": 0.6426, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 692 }, { "epoch": 1.7455919395465995, "grad_norm": 1.203125, "learning_rate": 5.8164663461538456e-06, "loss": 0.5234, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 693 }, { "epoch": 1.748110831234257, "grad_norm": 1.09375, "learning_rate": 5.794259818731118e-06, "loss": 0.6338, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 694 }, { "epoch": 1.7506297229219143, "grad_norm": 1.1015625, "learning_rate": 5.771810449574726e-06, "loss": 0.5635, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 695 }, { "epoch": 1.7531486146095718, "grad_norm": 1.109375, "learning_rate": 5.749114233353696e-06, "loss": 0.6226, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 696 }, { "epoch": 1.7556675062972293, "grad_norm": 1.0859375, "learning_rate": 5.7261670761670755e-06, "loss": 0.5293, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 697 }, { "epoch": 1.7581863979848866, "grad_norm": 1.65625, "learning_rate": 5.7029647930821494e-06, "loss": 0.5747, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 698 }, { "epoch": 1.760705289672544, "grad_norm": 1.0703125, "learning_rate": 5.679503105590063e-06, "loss": 0.5811, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 699 }, { "epoch": 1.7632241813602016, "grad_norm": 1.140625, "learning_rate": 5.65577763897564e-06, "loss": 0.5151, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 700 }, { "epoch": 1.765743073047859, "grad_norm": 1.5390625, "learning_rate": 5.63178391959799e-06, "loss": 0.5889, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 701 }, { "epoch": 1.7682619647355162, "grad_norm": 1.1171875, "learning_rate": 5.607517372078332e-06, "loss": 0.6143, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 702 }, { "epoch": 1.770780856423174, "grad_norm": 1.046875, "learning_rate": 5.5829733163913605e-06, "loss": 0.5054, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 703 }, { "epoch": 1.7732997481108312, "grad_norm": 1.171875, "learning_rate": 5.55814696485623e-06, "loss": 0.6992, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 704 }, { "epoch": 1.7758186397984885, "grad_norm": 1.140625, "learning_rate": 5.533033419023137e-06, "loss": 0.6201, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 705 }, { "epoch": 1.7783375314861463, "grad_norm": 1.109375, "learning_rate": 5.507627666451196e-06, "loss": 0.5996, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 706 }, { "epoch": 1.7808564231738035, "grad_norm": 0.98046875, "learning_rate": 5.481924577373212e-06, "loss": 0.5044, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 707 }, { "epoch": 1.7833753148614608, "grad_norm": 1.0625, "learning_rate": 5.455918901242642e-06, "loss": 0.561, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 708 }, { "epoch": 1.7858942065491183, "grad_norm": 1.1953125, "learning_rate": 5.429605263157894e-06, "loss": 0.5664, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 709 }, { "epoch": 1.7884130982367759, "grad_norm": 1.1484375, "learning_rate": 5.402978160158835e-06, "loss": 0.6094, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 710 }, { "epoch": 1.7909319899244331, "grad_norm": 1.078125, "learning_rate": 5.376031957390146e-06, "loss": 0.5884, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 711 }, { "epoch": 1.7934508816120907, "grad_norm": 1.0625, "learning_rate": 5.348760884125921e-06, "loss": 0.6143, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 712 }, { "epoch": 1.7959697732997482, "grad_norm": 1.1875, "learning_rate": 5.321159029649595e-06, "loss": 0.4966, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 713 }, { "epoch": 1.7984886649874054, "grad_norm": 1.234375, "learning_rate": 5.2932203389830515e-06, "loss": 0.6895, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 714 }, { "epoch": 1.801007556675063, "grad_norm": 1.0078125, "learning_rate": 5.264938608458389e-06, "loss": 0.4907, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 715 }, { "epoch": 1.8035264483627205, "grad_norm": 1.0625, "learning_rate": 5.236307481125599e-06, "loss": 0.5654, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 716 }, { "epoch": 1.8060453400503778, "grad_norm": 1.0625, "learning_rate": 5.20732044198895e-06, "loss": 0.4849, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 717 }, { "epoch": 1.8085642317380353, "grad_norm": 1.0390625, "learning_rate": 5.177970813064629e-06, "loss": 0.584, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 718 }, { "epoch": 1.8110831234256928, "grad_norm": 1.078125, "learning_rate": 5.1482517482517486e-06, "loss": 0.5273, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 719 }, { "epoch": 1.81360201511335, "grad_norm": 1.0078125, "learning_rate": 5.118156228008445e-06, "loss": 0.5342, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 720 }, { "epoch": 1.8161209068010076, "grad_norm": 1.078125, "learning_rate": 5.087677053824363e-06, "loss": 0.5098, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 721 }, { "epoch": 1.818639798488665, "grad_norm": 1.453125, "learning_rate": 5.0568068424803995e-06, "loss": 0.5674, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 722 }, { "epoch": 1.8211586901763224, "grad_norm": 1.078125, "learning_rate": 5.025538020086083e-06, "loss": 0.564, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 723 }, { "epoch": 1.8236775818639799, "grad_norm": 1.046875, "learning_rate": 4.993862815884477e-06, "loss": 0.5361, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 724 }, { "epoch": 1.8261964735516374, "grad_norm": 0.98046875, "learning_rate": 4.961773255813954e-06, "loss": 0.4805, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 725 }, { "epoch": 1.8287153652392947, "grad_norm": 1.171875, "learning_rate": 4.929261155815655e-06, "loss": 0.5068, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 726 }, { "epoch": 1.8312342569269522, "grad_norm": 1.09375, "learning_rate": 4.896318114874816e-06, "loss": 0.4937, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 727 }, { "epoch": 1.8337531486146097, "grad_norm": 1.1640625, "learning_rate": 4.862935507783544e-06, "loss": 0.5449, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 728 }, { "epoch": 1.836272040302267, "grad_norm": 1.2109375, "learning_rate": 4.82910447761194e-06, "loss": 0.6113, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 729 }, { "epoch": 1.8387909319899243, "grad_norm": 1.34375, "learning_rate": 4.794815927873779e-06, "loss": 0.584, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 730 }, { "epoch": 1.841309823677582, "grad_norm": 1.3671875, "learning_rate": 4.760060514372164e-06, "loss": 0.6162, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 731 }, { "epoch": 1.8438287153652393, "grad_norm": 1.328125, "learning_rate": 4.724828636709825e-06, "loss": 0.5566, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 732 }, { "epoch": 1.8463476070528966, "grad_norm": 1.2734375, "learning_rate": 4.689110429447854e-06, "loss": 0.6641, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 733 }, { "epoch": 1.8488664987405543, "grad_norm": 1.1796875, "learning_rate": 4.652895752895753e-06, "loss": 0.5552, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 734 }, { "epoch": 1.8513853904282116, "grad_norm": 1.0703125, "learning_rate": 4.616174183514774e-06, "loss": 0.5347, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 735 }, { "epoch": 1.8539042821158689, "grad_norm": 1.109375, "learning_rate": 4.578935003915427e-06, "loss": 0.563, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 736 }, { "epoch": 1.8564231738035264, "grad_norm": 1.703125, "learning_rate": 4.5411671924290224e-06, "loss": 0.585, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 737 }, { "epoch": 1.858942065491184, "grad_norm": 1.0859375, "learning_rate": 4.502859412231931e-06, "loss": 0.6113, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 738 }, { "epoch": 1.8614609571788412, "grad_norm": 1.1484375, "learning_rate": 4.464e-06, "loss": 0.73, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 739 }, { "epoch": 1.8639798488664987, "grad_norm": 1.265625, "learning_rate": 4.424576954069299e-06, "loss": 0.5967, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 740 }, { "epoch": 1.8664987405541562, "grad_norm": 1.21875, "learning_rate": 4.384577922077922e-06, "loss": 0.6104, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 741 }, { "epoch": 1.8690176322418135, "grad_norm": 1.234375, "learning_rate": 4.343990188062142e-06, "loss": 0.5493, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 742 }, { "epoch": 1.871536523929471, "grad_norm": 1.28125, "learning_rate": 4.302800658978583e-06, "loss": 0.5752, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 743 }, { "epoch": 1.8740554156171285, "grad_norm": 1.09375, "learning_rate": 4.260995850622407e-06, "loss": 0.5981, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 744 }, { "epoch": 1.8765743073047858, "grad_norm": 1.234375, "learning_rate": 4.218561872909699e-06, "loss": 0.5576, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 745 }, { "epoch": 1.8790931989924433, "grad_norm": 1.046875, "learning_rate": 4.1754844144903115e-06, "loss": 0.5034, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 746 }, { "epoch": 1.8816120906801008, "grad_norm": 0.984375, "learning_rate": 4.131748726655348e-06, "loss": 0.5176, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 747 }, { "epoch": 1.8841309823677581, "grad_norm": 1.0625, "learning_rate": 4.087339606501283e-06, "loss": 0.5635, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 748 }, { "epoch": 1.8866498740554156, "grad_norm": 1.0703125, "learning_rate": 4.042241379310345e-06, "loss": 0.624, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 749 }, { "epoch": 1.8891687657430731, "grad_norm": 1.078125, "learning_rate": 3.996437880104258e-06, "loss": 0.5171, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 750 }, { "epoch": 1.8916876574307304, "grad_norm": 1.0234375, "learning_rate": 3.949912434325744e-06, "loss": 0.5415, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 751 }, { "epoch": 1.894206549118388, "grad_norm": 1.109375, "learning_rate": 3.902647837599294e-06, "loss": 0.6504, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 752 }, { "epoch": 1.8967254408060454, "grad_norm": 1.046875, "learning_rate": 3.854626334519574e-06, "loss": 0.5332, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 753 }, { "epoch": 1.8992443324937027, "grad_norm": 1.0703125, "learning_rate": 3.8058295964125566e-06, "loss": 0.5469, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 754 }, { "epoch": 1.9017632241813602, "grad_norm": 1.0625, "learning_rate": 3.75623869801085e-06, "loss": 0.627, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 755 }, { "epoch": 1.9042821158690177, "grad_norm": 1.140625, "learning_rate": 3.7058340929808575e-06, "loss": 0.4717, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 756 }, { "epoch": 1.906801007556675, "grad_norm": 1.1328125, "learning_rate": 3.6545955882352945e-06, "loss": 0.5918, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 757 }, { "epoch": 1.9093198992443325, "grad_norm": 1.03125, "learning_rate": 3.6025023169601484e-06, "loss": 0.5396, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 758 }, { "epoch": 1.91183879093199, "grad_norm": 1.0703125, "learning_rate": 3.5495327102803733e-06, "loss": 0.6538, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 759 }, { "epoch": 1.9143576826196473, "grad_norm": 1.171875, "learning_rate": 3.4956644674835065e-06, "loss": 0.6675, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 760 }, { "epoch": 1.9168765743073046, "grad_norm": 1.03125, "learning_rate": 3.4408745247148288e-06, "loss": 0.5562, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 761 }, { "epoch": 1.9193954659949624, "grad_norm": 1.015625, "learning_rate": 3.3851390220517743e-06, "loss": 0.5244, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 762 }, { "epoch": 1.9219143576826196, "grad_norm": 1.1328125, "learning_rate": 3.3284332688588005e-06, "loss": 0.5225, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 763 }, { "epoch": 1.924433249370277, "grad_norm": 1.046875, "learning_rate": 3.2707317073170743e-06, "loss": 0.5762, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 764 }, { "epoch": 1.9269521410579347, "grad_norm": 1.03125, "learning_rate": 3.212007874015748e-06, "loss": 0.5317, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 765 }, { "epoch": 1.929471032745592, "grad_norm": 1.1796875, "learning_rate": 3.1522343594836146e-06, "loss": 0.6099, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 766 }, { "epoch": 1.9319899244332492, "grad_norm": 1.0, "learning_rate": 3.0913827655310624e-06, "loss": 0.5562, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 767 }, { "epoch": 1.9345088161209067, "grad_norm": 1.140625, "learning_rate": 3.029423660262892e-06, "loss": 0.519, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 768 }, { "epoch": 1.9370277078085643, "grad_norm": 1.1875, "learning_rate": 2.966326530612245e-06, "loss": 0.7002, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 769 }, { "epoch": 1.9395465994962215, "grad_norm": 1.0234375, "learning_rate": 2.9020597322348096e-06, "loss": 0.5332, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 770 }, { "epoch": 1.942065491183879, "grad_norm": 0.96875, "learning_rate": 2.8365904365904367e-06, "loss": 0.5713, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 771 }, { "epoch": 1.9445843828715366, "grad_norm": 1.203125, "learning_rate": 2.7698845750262335e-06, "loss": 0.5391, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 772 }, { "epoch": 1.9471032745591939, "grad_norm": 1.59375, "learning_rate": 2.701906779661017e-06, "loss": 0.543, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 773 }, { "epoch": 1.9496221662468514, "grad_norm": 1.0546875, "learning_rate": 2.6326203208556147e-06, "loss": 0.5781, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 774 }, { "epoch": 1.9521410579345089, "grad_norm": 1.4453125, "learning_rate": 2.5619870410367173e-06, "loss": 0.522, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 775 }, { "epoch": 1.9546599496221662, "grad_norm": 1.484375, "learning_rate": 2.4899672846237735e-06, "loss": 0.6621, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 776 }, { "epoch": 1.9571788413098237, "grad_norm": 0.984375, "learning_rate": 2.4165198237885463e-06, "loss": 0.5815, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 777 }, { "epoch": 1.9596977329974812, "grad_norm": 0.984375, "learning_rate": 2.341601779755284e-06, "loss": 0.5122, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 778 }, { "epoch": 1.9622166246851385, "grad_norm": 1.0078125, "learning_rate": 2.265168539325843e-06, "loss": 0.6152, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 779 }, { "epoch": 1.964735516372796, "grad_norm": 1.078125, "learning_rate": 2.187173666288309e-06, "loss": 0.5112, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 780 }, { "epoch": 1.9672544080604535, "grad_norm": 1.0078125, "learning_rate": 2.1075688073394493e-06, "loss": 0.644, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 781 }, { "epoch": 1.9697732997481108, "grad_norm": 0.95703125, "learning_rate": 2.0263035921205102e-06, "loss": 0.4868, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 782 }, { "epoch": 1.9722921914357683, "grad_norm": 1.015625, "learning_rate": 1.9433255269320845e-06, "loss": 0.5225, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 783 }, { "epoch": 1.9748110831234258, "grad_norm": 1.0, "learning_rate": 1.858579881656805e-06, "loss": 0.5278, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 784 }, { "epoch": 1.977329974811083, "grad_norm": 1.1484375, "learning_rate": 1.7720095693779906e-06, "loss": 0.5791, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 785 }, { "epoch": 1.9798488664987406, "grad_norm": 0.95703125, "learning_rate": 1.683555018137848e-06, "loss": 0.5352, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 786 }, { "epoch": 1.982367758186398, "grad_norm": 1.078125, "learning_rate": 1.5931540342298292e-06, "loss": 0.5562, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 787 }, { "epoch": 1.9848866498740554, "grad_norm": 1.2109375, "learning_rate": 1.500741656365884e-06, "loss": 0.5269, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 788 }, { "epoch": 1.987405541561713, "grad_norm": 1.0390625, "learning_rate": 1.40625e-06, "loss": 0.5869, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 789 }, { "epoch": 1.9899244332493704, "grad_norm": 1.0234375, "learning_rate": 1.3096080910240203e-06, "loss": 0.5107, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 790 }, { "epoch": 1.9924433249370277, "grad_norm": 1.453125, "learning_rate": 1.2107416879795398e-06, "loss": 0.5762, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 791 }, { "epoch": 1.994962216624685, "grad_norm": 1.125, "learning_rate": 1.1095730918499354e-06, "loss": 0.6152, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 792 }, { "epoch": 1.9974811083123427, "grad_norm": 1.0703125, "learning_rate": 1.006020942408377e-06, "loss": 0.6211, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 793 }, { "epoch": 2.0, "grad_norm": 1.0078125, "learning_rate": 9.000000000000001e-07, "loss": 0.6016, "memory/device_mem_reserved(gib)": 101.22, "memory/max_mem_active(gib)": 87.86, "memory/max_mem_allocated(gib)": 87.86, "step": 794 } ], "logging_steps": 1, "max_steps": 794, "num_input_tokens_seen": 0, "num_train_epochs": 2, "save_steps": 199, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 1.33575824886842e+19, "train_batch_size": 4, "trial_name": null, "trial_params": null }