{ "best_global_step": null, "best_metric": null, "best_model_checkpoint": null, "epoch": 3.0, "eval_steps": 500, "global_step": 11853, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0, "eval_loss": 1.0227874517440796, "eval_runtime": 1025.9766, "eval_samples_per_second": 41.442, "eval_steps_per_second": 0.648, "step": 0 }, { "epoch": 0.0002531004808909137, "grad_norm": 1.5422555208206177, "learning_rate": 0.0, "loss": 0.7739, "step": 1 }, { "epoch": 0.0005062009617818274, "grad_norm": 1.7739096879959106, "learning_rate": 2.0000000000000002e-07, "loss": 0.7967, "step": 2 }, { "epoch": 0.0007593014426727411, "grad_norm": 1.5074769258499146, "learning_rate": 4.0000000000000003e-07, "loss": 0.7494, "step": 3 }, { "epoch": 0.0010124019235636548, "grad_norm": 1.5649176836013794, "learning_rate": 6.000000000000001e-07, "loss": 0.7765, "step": 4 }, { "epoch": 0.0012655024044545685, "grad_norm": 1.6183850765228271, "learning_rate": 8.000000000000001e-07, "loss": 0.7806, "step": 5 }, { "epoch": 0.0015186028853454822, "grad_norm": 1.4134117364883423, "learning_rate": 1.0000000000000002e-06, "loss": 0.7581, "step": 6 }, { "epoch": 0.001771703366236396, "grad_norm": 1.2370158433914185, "learning_rate": 1.2000000000000002e-06, "loss": 0.7565, "step": 7 }, { "epoch": 0.0020248038471273096, "grad_norm": 1.086237907409668, "learning_rate": 1.4000000000000001e-06, "loss": 0.7564, "step": 8 }, { "epoch": 0.002277904328018223, "grad_norm": 0.9099379777908325, "learning_rate": 1.6000000000000001e-06, "loss": 0.7606, "step": 9 }, { "epoch": 0.002531004808909137, "grad_norm": 0.7214070558547974, "learning_rate": 1.8000000000000001e-06, "loss": 0.7334, "step": 10 }, { "epoch": 0.0027841052898000505, "grad_norm": 0.5764913558959961, "learning_rate": 2.0000000000000003e-06, "loss": 0.7254, "step": 11 }, { "epoch": 0.0030372057706909645, "grad_norm": 0.4785311222076416, "learning_rate": 2.2e-06, "loss": 0.7321, "step": 12 }, { "epoch": 0.003290306251581878, "grad_norm": 0.39615628123283386, "learning_rate": 2.4000000000000003e-06, "loss": 0.7124, "step": 13 }, { "epoch": 0.003543406732472792, "grad_norm": 0.3793116509914398, "learning_rate": 2.6e-06, "loss": 0.6932, "step": 14 }, { "epoch": 0.0037965072133637054, "grad_norm": 0.3727288842201233, "learning_rate": 2.8000000000000003e-06, "loss": 0.7294, "step": 15 }, { "epoch": 0.004049607694254619, "grad_norm": 0.40745407342910767, "learning_rate": 3e-06, "loss": 0.705, "step": 16 }, { "epoch": 0.004302708175145533, "grad_norm": 0.42191222310066223, "learning_rate": 3.2000000000000003e-06, "loss": 0.7075, "step": 17 }, { "epoch": 0.004555808656036446, "grad_norm": 0.40793243050575256, "learning_rate": 3.4000000000000005e-06, "loss": 0.7056, "step": 18 }, { "epoch": 0.00480890913692736, "grad_norm": 0.40160873532295227, "learning_rate": 3.6000000000000003e-06, "loss": 0.72, "step": 19 }, { "epoch": 0.005062009617818274, "grad_norm": 0.3526245951652527, "learning_rate": 3.8000000000000005e-06, "loss": 0.7102, "step": 20 }, { "epoch": 0.005315110098709187, "grad_norm": 0.3622744381427765, "learning_rate": 4.000000000000001e-06, "loss": 0.6996, "step": 21 }, { "epoch": 0.005568210579600101, "grad_norm": 0.3431853950023651, "learning_rate": 4.2000000000000004e-06, "loss": 0.6969, "step": 22 }, { "epoch": 0.005821311060491015, "grad_norm": 0.28951337933540344, "learning_rate": 4.4e-06, "loss": 0.7094, "step": 23 }, { "epoch": 0.006074411541381929, "grad_norm": 0.28734734654426575, "learning_rate": 4.600000000000001e-06, "loss": 0.6972, "step": 24 }, { "epoch": 0.006327512022272842, "grad_norm": 0.20745716989040375, "learning_rate": 4.800000000000001e-06, "loss": 0.6743, "step": 25 }, { "epoch": 0.006580612503163756, "grad_norm": 0.20546671748161316, "learning_rate": 5e-06, "loss": 0.6905, "step": 26 }, { "epoch": 0.00683371298405467, "grad_norm": 0.2157692164182663, "learning_rate": 5.2e-06, "loss": 0.6685, "step": 27 }, { "epoch": 0.007086813464945584, "grad_norm": 0.20869505405426025, "learning_rate": 5.400000000000001e-06, "loss": 0.6728, "step": 28 }, { "epoch": 0.007339913945836497, "grad_norm": 0.22692961990833282, "learning_rate": 5.600000000000001e-06, "loss": 0.6878, "step": 29 }, { "epoch": 0.007593014426727411, "grad_norm": 0.2152116298675537, "learning_rate": 5.8e-06, "loss": 0.6974, "step": 30 }, { "epoch": 0.007846114907618324, "grad_norm": 0.21653513610363007, "learning_rate": 6e-06, "loss": 0.6917, "step": 31 }, { "epoch": 0.008099215388509239, "grad_norm": 0.22303710877895355, "learning_rate": 6.200000000000001e-06, "loss": 0.7065, "step": 32 }, { "epoch": 0.008352315869400152, "grad_norm": 0.21773776412010193, "learning_rate": 6.4000000000000006e-06, "loss": 0.6777, "step": 33 }, { "epoch": 0.008605416350291066, "grad_norm": 0.19383567571640015, "learning_rate": 6.600000000000001e-06, "loss": 0.7071, "step": 34 }, { "epoch": 0.00885851683118198, "grad_norm": 0.17862704396247864, "learning_rate": 6.800000000000001e-06, "loss": 0.6903, "step": 35 }, { "epoch": 0.009111617312072893, "grad_norm": 0.18009145557880402, "learning_rate": 7e-06, "loss": 0.6597, "step": 36 }, { "epoch": 0.009364717792963807, "grad_norm": 0.28171759843826294, "learning_rate": 7.2000000000000005e-06, "loss": 0.6641, "step": 37 }, { "epoch": 0.00961781827385472, "grad_norm": 0.18906806409358978, "learning_rate": 7.4e-06, "loss": 0.6988, "step": 38 }, { "epoch": 0.009870918754745633, "grad_norm": 0.24441087245941162, "learning_rate": 7.600000000000001e-06, "loss": 0.6586, "step": 39 }, { "epoch": 0.010124019235636548, "grad_norm": 0.19522865116596222, "learning_rate": 7.800000000000002e-06, "loss": 0.6526, "step": 40 }, { "epoch": 0.010377119716527461, "grad_norm": 0.21378129720687866, "learning_rate": 8.000000000000001e-06, "loss": 0.6883, "step": 41 }, { "epoch": 0.010630220197418374, "grad_norm": 0.1778658777475357, "learning_rate": 8.2e-06, "loss": 0.6701, "step": 42 }, { "epoch": 0.010883320678309289, "grad_norm": 0.16810527443885803, "learning_rate": 8.400000000000001e-06, "loss": 0.6445, "step": 43 }, { "epoch": 0.011136421159200202, "grad_norm": 0.18885721266269684, "learning_rate": 8.6e-06, "loss": 0.6977, "step": 44 }, { "epoch": 0.011389521640091117, "grad_norm": 0.19532494246959686, "learning_rate": 8.8e-06, "loss": 0.6694, "step": 45 }, { "epoch": 0.01164262212098203, "grad_norm": 0.1723746508359909, "learning_rate": 9e-06, "loss": 0.6703, "step": 46 }, { "epoch": 0.011895722601872943, "grad_norm": 0.18233156204223633, "learning_rate": 9.200000000000002e-06, "loss": 0.6674, "step": 47 }, { "epoch": 0.012148823082763858, "grad_norm": 0.18409250676631927, "learning_rate": 9.4e-06, "loss": 0.6562, "step": 48 }, { "epoch": 0.012401923563654771, "grad_norm": 0.1799033135175705, "learning_rate": 9.600000000000001e-06, "loss": 0.6391, "step": 49 }, { "epoch": 0.012655024044545684, "grad_norm": 0.17093200981616974, "learning_rate": 9.800000000000001e-06, "loss": 0.6609, "step": 50 }, { "epoch": 0.012908124525436599, "grad_norm": 0.16845975816249847, "learning_rate": 1e-05, "loss": 0.6591, "step": 51 }, { "epoch": 0.013161225006327512, "grad_norm": 0.16641423106193542, "learning_rate": 1.02e-05, "loss": 0.6759, "step": 52 }, { "epoch": 0.013414325487218427, "grad_norm": 0.19389550387859344, "learning_rate": 1.04e-05, "loss": 0.6882, "step": 53 }, { "epoch": 0.01366742596810934, "grad_norm": 0.16377364099025726, "learning_rate": 1.0600000000000002e-05, "loss": 0.6783, "step": 54 }, { "epoch": 0.013920526449000253, "grad_norm": 0.1614464521408081, "learning_rate": 1.0800000000000002e-05, "loss": 0.6597, "step": 55 }, { "epoch": 0.014173626929891167, "grad_norm": 0.18549728393554688, "learning_rate": 1.1000000000000001e-05, "loss": 0.6742, "step": 56 }, { "epoch": 0.01442672741078208, "grad_norm": 0.16043926775455475, "learning_rate": 1.1200000000000001e-05, "loss": 0.6525, "step": 57 }, { "epoch": 0.014679827891672994, "grad_norm": 0.1643228679895401, "learning_rate": 1.14e-05, "loss": 0.6534, "step": 58 }, { "epoch": 0.014932928372563908, "grad_norm": 0.1760501265525818, "learning_rate": 1.16e-05, "loss": 0.6384, "step": 59 }, { "epoch": 0.015186028853454821, "grad_norm": 0.1705779880285263, "learning_rate": 1.18e-05, "loss": 0.6611, "step": 60 }, { "epoch": 0.015439129334345734, "grad_norm": 0.16293965280056, "learning_rate": 1.2e-05, "loss": 0.6395, "step": 61 }, { "epoch": 0.015692229815236648, "grad_norm": 0.1599552184343338, "learning_rate": 1.22e-05, "loss": 0.6309, "step": 62 }, { "epoch": 0.015945330296127564, "grad_norm": 0.16157129406929016, "learning_rate": 1.2400000000000002e-05, "loss": 0.6506, "step": 63 }, { "epoch": 0.016198430777018477, "grad_norm": 0.16981346905231476, "learning_rate": 1.2600000000000001e-05, "loss": 0.6367, "step": 64 }, { "epoch": 0.01645153125790939, "grad_norm": 0.16686077415943146, "learning_rate": 1.2800000000000001e-05, "loss": 0.6335, "step": 65 }, { "epoch": 0.016704631738800303, "grad_norm": 0.16791872680187225, "learning_rate": 1.3000000000000001e-05, "loss": 0.659, "step": 66 }, { "epoch": 0.016957732219691216, "grad_norm": 0.18651379644870758, "learning_rate": 1.3200000000000002e-05, "loss": 0.6621, "step": 67 }, { "epoch": 0.017210832700582133, "grad_norm": 0.16107667982578278, "learning_rate": 1.3400000000000002e-05, "loss": 0.6526, "step": 68 }, { "epoch": 0.017463933181473046, "grad_norm": 0.19045163691043854, "learning_rate": 1.3600000000000002e-05, "loss": 0.6356, "step": 69 }, { "epoch": 0.01771703366236396, "grad_norm": 0.1727728545665741, "learning_rate": 1.38e-05, "loss": 0.6321, "step": 70 }, { "epoch": 0.017970134143254872, "grad_norm": 0.16364595293998718, "learning_rate": 1.4e-05, "loss": 0.6547, "step": 71 }, { "epoch": 0.018223234624145785, "grad_norm": 0.1609407365322113, "learning_rate": 1.4200000000000001e-05, "loss": 0.6085, "step": 72 }, { "epoch": 0.018476335105036698, "grad_norm": 0.1595495045185089, "learning_rate": 1.4400000000000001e-05, "loss": 0.6321, "step": 73 }, { "epoch": 0.018729435585927615, "grad_norm": 0.19799435138702393, "learning_rate": 1.46e-05, "loss": 0.6253, "step": 74 }, { "epoch": 0.018982536066818528, "grad_norm": 0.16346189379692078, "learning_rate": 1.48e-05, "loss": 0.6364, "step": 75 }, { "epoch": 0.01923563654770944, "grad_norm": 0.16418568789958954, "learning_rate": 1.5000000000000002e-05, "loss": 0.6375, "step": 76 }, { "epoch": 0.019488737028600354, "grad_norm": 0.18783988058567047, "learning_rate": 1.5200000000000002e-05, "loss": 0.6447, "step": 77 }, { "epoch": 0.019741837509491267, "grad_norm": 0.19690784811973572, "learning_rate": 1.54e-05, "loss": 0.6385, "step": 78 }, { "epoch": 0.019994937990382183, "grad_norm": 0.17404019832611084, "learning_rate": 1.5600000000000003e-05, "loss": 0.6358, "step": 79 }, { "epoch": 0.020248038471273096, "grad_norm": 0.1718037873506546, "learning_rate": 1.58e-05, "loss": 0.6317, "step": 80 }, { "epoch": 0.02050113895216401, "grad_norm": 0.1750226616859436, "learning_rate": 1.6000000000000003e-05, "loss": 0.6287, "step": 81 }, { "epoch": 0.020754239433054922, "grad_norm": 0.17224791646003723, "learning_rate": 1.62e-05, "loss": 0.6459, "step": 82 }, { "epoch": 0.021007339913945836, "grad_norm": 0.18394415080547333, "learning_rate": 1.64e-05, "loss": 0.6467, "step": 83 }, { "epoch": 0.02126044039483675, "grad_norm": 0.16639743745326996, "learning_rate": 1.66e-05, "loss": 0.6748, "step": 84 }, { "epoch": 0.021513540875727665, "grad_norm": 0.18489432334899902, "learning_rate": 1.6800000000000002e-05, "loss": 0.6451, "step": 85 }, { "epoch": 0.021766641356618578, "grad_norm": 0.165752574801445, "learning_rate": 1.7e-05, "loss": 0.6058, "step": 86 }, { "epoch": 0.02201974183750949, "grad_norm": 0.18663623929023743, "learning_rate": 1.72e-05, "loss": 0.6741, "step": 87 }, { "epoch": 0.022272842318400404, "grad_norm": 0.18166765570640564, "learning_rate": 1.7400000000000003e-05, "loss": 0.6332, "step": 88 }, { "epoch": 0.022525942799291317, "grad_norm": 0.17642953991889954, "learning_rate": 1.76e-05, "loss": 0.6135, "step": 89 }, { "epoch": 0.022779043280182234, "grad_norm": 0.1743820458650589, "learning_rate": 1.7800000000000002e-05, "loss": 0.6025, "step": 90 }, { "epoch": 0.023032143761073147, "grad_norm": 0.17671117186546326, "learning_rate": 1.8e-05, "loss": 0.6119, "step": 91 }, { "epoch": 0.02328524424196406, "grad_norm": 0.17659315466880798, "learning_rate": 1.8200000000000002e-05, "loss": 0.6435, "step": 92 }, { "epoch": 0.023538344722854973, "grad_norm": 0.176067516207695, "learning_rate": 1.8400000000000003e-05, "loss": 0.6242, "step": 93 }, { "epoch": 0.023791445203745886, "grad_norm": 0.177460178732872, "learning_rate": 1.86e-05, "loss": 0.6531, "step": 94 }, { "epoch": 0.0240445456846368, "grad_norm": 0.21800711750984192, "learning_rate": 1.88e-05, "loss": 0.5926, "step": 95 }, { "epoch": 0.024297646165527716, "grad_norm": 0.17698881030082703, "learning_rate": 1.9e-05, "loss": 0.6201, "step": 96 }, { "epoch": 0.02455074664641863, "grad_norm": 0.17561809718608856, "learning_rate": 1.9200000000000003e-05, "loss": 0.6235, "step": 97 }, { "epoch": 0.024803847127309542, "grad_norm": 0.17871560156345367, "learning_rate": 1.94e-05, "loss": 0.6317, "step": 98 }, { "epoch": 0.025056947608200455, "grad_norm": 0.17323796451091766, "learning_rate": 1.9600000000000002e-05, "loss": 0.614, "step": 99 }, { "epoch": 0.025310048089091368, "grad_norm": 0.1735820323228836, "learning_rate": 1.98e-05, "loss": 0.6155, "step": 100 }, { "epoch": 0.025563148569982284, "grad_norm": 0.18773189187049866, "learning_rate": 2e-05, "loss": 0.6282, "step": 101 }, { "epoch": 0.025816249050873197, "grad_norm": 0.17454087734222412, "learning_rate": 1.999999971336061e-05, "loss": 0.6495, "step": 102 }, { "epoch": 0.02606934953176411, "grad_norm": 0.17310722172260284, "learning_rate": 1.999999885344244e-05, "loss": 0.664, "step": 103 }, { "epoch": 0.026322450012655024, "grad_norm": 0.17041310667991638, "learning_rate": 1.9999997420245554e-05, "loss": 0.5958, "step": 104 }, { "epoch": 0.026575550493545937, "grad_norm": 0.17476394772529602, "learning_rate": 1.999999541377003e-05, "loss": 0.6042, "step": 105 }, { "epoch": 0.026828650974436853, "grad_norm": 0.18969106674194336, "learning_rate": 1.999999283401598e-05, "loss": 0.6414, "step": 106 }, { "epoch": 0.027081751455327766, "grad_norm": 0.17666743695735931, "learning_rate": 1.999998968098355e-05, "loss": 0.6095, "step": 107 }, { "epoch": 0.02733485193621868, "grad_norm": 0.16765183210372925, "learning_rate": 1.9999985954672926e-05, "loss": 0.6298, "step": 108 }, { "epoch": 0.027587952417109592, "grad_norm": 0.16784022748470306, "learning_rate": 1.999998165508432e-05, "loss": 0.6207, "step": 109 }, { "epoch": 0.027841052898000505, "grad_norm": 0.1832384467124939, "learning_rate": 1.999997678221798e-05, "loss": 0.643, "step": 110 }, { "epoch": 0.02809415337889142, "grad_norm": 0.18284334242343903, "learning_rate": 1.9999971336074178e-05, "loss": 0.6085, "step": 111 }, { "epoch": 0.028347253859782335, "grad_norm": 0.25969794392585754, "learning_rate": 1.9999965316653238e-05, "loss": 0.591, "step": 112 }, { "epoch": 0.028600354340673248, "grad_norm": 0.17420271039009094, "learning_rate": 1.9999958723955496e-05, "loss": 0.6081, "step": 113 }, { "epoch": 0.02885345482156416, "grad_norm": 0.17406368255615234, "learning_rate": 1.999995155798133e-05, "loss": 0.6538, "step": 114 }, { "epoch": 0.029106555302455074, "grad_norm": 0.2751551866531372, "learning_rate": 1.9999943818731156e-05, "loss": 0.5929, "step": 115 }, { "epoch": 0.029359655783345987, "grad_norm": 0.1678651124238968, "learning_rate": 1.9999935506205416e-05, "loss": 0.6078, "step": 116 }, { "epoch": 0.029612756264236904, "grad_norm": 0.1709502935409546, "learning_rate": 1.9999926620404585e-05, "loss": 0.5928, "step": 117 }, { "epoch": 0.029865856745127817, "grad_norm": 0.17515309154987335, "learning_rate": 1.9999917161329173e-05, "loss": 0.6355, "step": 118 }, { "epoch": 0.03011895722601873, "grad_norm": 0.17262603342533112, "learning_rate": 1.9999907128979723e-05, "loss": 0.5745, "step": 119 }, { "epoch": 0.030372057706909643, "grad_norm": 0.18772061169147491, "learning_rate": 1.999989652335681e-05, "loss": 0.6168, "step": 120 }, { "epoch": 0.030625158187800556, "grad_norm": 0.17533883452415466, "learning_rate": 1.999988534446104e-05, "loss": 0.6442, "step": 121 }, { "epoch": 0.03087825866869147, "grad_norm": 0.16918431222438812, "learning_rate": 1.9999873592293054e-05, "loss": 0.6262, "step": 122 }, { "epoch": 0.031131359149582385, "grad_norm": 0.17905767261981964, "learning_rate": 1.999986126685353e-05, "loss": 0.6213, "step": 123 }, { "epoch": 0.031384459630473295, "grad_norm": 0.1797955334186554, "learning_rate": 1.999984836814317e-05, "loss": 0.6448, "step": 124 }, { "epoch": 0.03163756011136421, "grad_norm": 0.17527543008327484, "learning_rate": 1.9999834896162716e-05, "loss": 0.6416, "step": 125 }, { "epoch": 0.03189066059225513, "grad_norm": 0.18013723194599152, "learning_rate": 1.999982085091294e-05, "loss": 0.6293, "step": 126 }, { "epoch": 0.03214376107314604, "grad_norm": 0.27788278460502625, "learning_rate": 1.9999806232394644e-05, "loss": 0.6174, "step": 127 }, { "epoch": 0.032396861554036954, "grad_norm": 0.17403407394886017, "learning_rate": 1.9999791040608674e-05, "loss": 0.6236, "step": 128 }, { "epoch": 0.032649962034927864, "grad_norm": 0.18010959029197693, "learning_rate": 1.999977527555589e-05, "loss": 0.6268, "step": 129 }, { "epoch": 0.03290306251581878, "grad_norm": 0.168148934841156, "learning_rate": 1.9999758937237206e-05, "loss": 0.5944, "step": 130 }, { "epoch": 0.0331561629967097, "grad_norm": 0.18338632583618164, "learning_rate": 1.999974202565355e-05, "loss": 0.584, "step": 131 }, { "epoch": 0.033409263477600606, "grad_norm": 0.17366443574428558, "learning_rate": 1.9999724540805898e-05, "loss": 0.6433, "step": 132 }, { "epoch": 0.03366236395849152, "grad_norm": 0.16738726198673248, "learning_rate": 1.9999706482695248e-05, "loss": 0.6111, "step": 133 }, { "epoch": 0.03391546443938243, "grad_norm": 0.17759299278259277, "learning_rate": 1.999968785132264e-05, "loss": 0.6017, "step": 134 }, { "epoch": 0.03416856492027335, "grad_norm": 0.1689685583114624, "learning_rate": 1.9999668646689137e-05, "loss": 0.6109, "step": 135 }, { "epoch": 0.034421665401164266, "grad_norm": 0.25977635383605957, "learning_rate": 1.9999648868795845e-05, "loss": 0.6312, "step": 136 }, { "epoch": 0.034674765882055175, "grad_norm": 0.1700809746980667, "learning_rate": 1.9999628517643888e-05, "loss": 0.6069, "step": 137 }, { "epoch": 0.03492786636294609, "grad_norm": 0.1755145788192749, "learning_rate": 1.999960759323445e-05, "loss": 0.6094, "step": 138 }, { "epoch": 0.035180966843837, "grad_norm": 0.17850720882415771, "learning_rate": 1.9999586095568714e-05, "loss": 0.6176, "step": 139 }, { "epoch": 0.03543406732472792, "grad_norm": 0.1870347112417221, "learning_rate": 1.999956402464792e-05, "loss": 0.648, "step": 140 }, { "epoch": 0.03568716780561883, "grad_norm": 0.17585211992263794, "learning_rate": 1.999954138047333e-05, "loss": 0.5954, "step": 141 }, { "epoch": 0.035940268286509744, "grad_norm": 0.17679710686206818, "learning_rate": 1.9999518163046246e-05, "loss": 0.5972, "step": 142 }, { "epoch": 0.03619336876740066, "grad_norm": 0.18154670298099518, "learning_rate": 1.9999494372367997e-05, "loss": 0.6638, "step": 143 }, { "epoch": 0.03644646924829157, "grad_norm": 0.1948469579219818, "learning_rate": 1.9999470008439947e-05, "loss": 0.573, "step": 144 }, { "epoch": 0.03669956972918249, "grad_norm": 0.17780163884162903, "learning_rate": 1.999944507126349e-05, "loss": 0.6046, "step": 145 }, { "epoch": 0.036952670210073396, "grad_norm": 0.17574277520179749, "learning_rate": 1.9999419560840063e-05, "loss": 0.6046, "step": 146 }, { "epoch": 0.03720577069096431, "grad_norm": 0.16369116306304932, "learning_rate": 1.999939347717112e-05, "loss": 0.5947, "step": 147 }, { "epoch": 0.03745887117185523, "grad_norm": 0.17040003836154938, "learning_rate": 1.9999366820258165e-05, "loss": 0.6176, "step": 148 }, { "epoch": 0.03771197165274614, "grad_norm": 0.1718555986881256, "learning_rate": 1.9999339590102718e-05, "loss": 0.6008, "step": 149 }, { "epoch": 0.037965072133637055, "grad_norm": 0.17747220396995544, "learning_rate": 1.9999311786706343e-05, "loss": 0.5756, "step": 150 }, { "epoch": 0.038218172614527965, "grad_norm": 0.17003098130226135, "learning_rate": 1.9999283410070632e-05, "loss": 0.6252, "step": 151 }, { "epoch": 0.03847127309541888, "grad_norm": 0.18186484277248383, "learning_rate": 1.999925446019722e-05, "loss": 0.5814, "step": 152 }, { "epoch": 0.0387243735763098, "grad_norm": 0.17330726981163025, "learning_rate": 1.9999224937087754e-05, "loss": 0.6205, "step": 153 }, { "epoch": 0.03897747405720071, "grad_norm": 0.1642872840166092, "learning_rate": 1.9999194840743938e-05, "loss": 0.6079, "step": 154 }, { "epoch": 0.039230574538091624, "grad_norm": 0.1646062582731247, "learning_rate": 1.999916417116749e-05, "loss": 0.6499, "step": 155 }, { "epoch": 0.039483675018982534, "grad_norm": 0.16851483285427094, "learning_rate": 1.9999132928360172e-05, "loss": 0.6216, "step": 156 }, { "epoch": 0.03973677549987345, "grad_norm": 0.16902515292167664, "learning_rate": 1.9999101112323774e-05, "loss": 0.6106, "step": 157 }, { "epoch": 0.03998987598076437, "grad_norm": 0.16902323067188263, "learning_rate": 1.999906872306012e-05, "loss": 0.6175, "step": 158 }, { "epoch": 0.040242976461655276, "grad_norm": 0.17191338539123535, "learning_rate": 1.9999035760571065e-05, "loss": 0.6585, "step": 159 }, { "epoch": 0.04049607694254619, "grad_norm": 0.16515901684761047, "learning_rate": 1.99990022248585e-05, "loss": 0.6281, "step": 160 }, { "epoch": 0.0407491774234371, "grad_norm": 0.18414366245269775, "learning_rate": 1.9998968115924352e-05, "loss": 0.6127, "step": 161 }, { "epoch": 0.04100227790432802, "grad_norm": 0.16314849257469177, "learning_rate": 1.999893343377057e-05, "loss": 0.5814, "step": 162 }, { "epoch": 0.04125537838521893, "grad_norm": 0.16900253295898438, "learning_rate": 1.9998898178399142e-05, "loss": 0.6057, "step": 163 }, { "epoch": 0.041508478866109845, "grad_norm": 0.1643771082162857, "learning_rate": 1.999886234981209e-05, "loss": 0.6169, "step": 164 }, { "epoch": 0.04176157934700076, "grad_norm": 0.1646137237548828, "learning_rate": 1.9998825948011476e-05, "loss": 0.6144, "step": 165 }, { "epoch": 0.04201467982789167, "grad_norm": 0.17198063433170319, "learning_rate": 1.9998788972999374e-05, "loss": 0.5821, "step": 166 }, { "epoch": 0.04226778030878259, "grad_norm": 0.15963180363178253, "learning_rate": 1.9998751424777914e-05, "loss": 0.5805, "step": 167 }, { "epoch": 0.0425208807896735, "grad_norm": 0.17139393091201782, "learning_rate": 1.9998713303349242e-05, "loss": 0.6, "step": 168 }, { "epoch": 0.042773981270564414, "grad_norm": 0.17048956453800201, "learning_rate": 1.999867460871555e-05, "loss": 0.598, "step": 169 }, { "epoch": 0.04302708175145533, "grad_norm": 0.16242019832134247, "learning_rate": 1.9998635340879046e-05, "loss": 0.6278, "step": 170 }, { "epoch": 0.04328018223234624, "grad_norm": 0.1649109423160553, "learning_rate": 1.9998595499841994e-05, "loss": 0.6153, "step": 171 }, { "epoch": 0.043533282713237156, "grad_norm": 0.1605859398841858, "learning_rate": 1.9998555085606668e-05, "loss": 0.6199, "step": 172 }, { "epoch": 0.043786383194128066, "grad_norm": 0.18250787258148193, "learning_rate": 1.9998514098175388e-05, "loss": 0.6057, "step": 173 }, { "epoch": 0.04403948367501898, "grad_norm": 0.1787337362766266, "learning_rate": 1.9998472537550505e-05, "loss": 0.5691, "step": 174 }, { "epoch": 0.0442925841559099, "grad_norm": 0.15843160450458527, "learning_rate": 1.9998430403734402e-05, "loss": 0.6294, "step": 175 }, { "epoch": 0.04454568463680081, "grad_norm": 0.16140268743038177, "learning_rate": 1.999838769672949e-05, "loss": 0.6127, "step": 176 }, { "epoch": 0.044798785117691725, "grad_norm": 0.20213818550109863, "learning_rate": 1.9998344416538225e-05, "loss": 0.6074, "step": 177 }, { "epoch": 0.045051885598582635, "grad_norm": 0.17139162123203278, "learning_rate": 1.999830056316308e-05, "loss": 0.6083, "step": 178 }, { "epoch": 0.04530498607947355, "grad_norm": 0.18342524766921997, "learning_rate": 1.999825613660657e-05, "loss": 0.6143, "step": 179 }, { "epoch": 0.04555808656036447, "grad_norm": 0.15636856853961945, "learning_rate": 1.9998211136871252e-05, "loss": 0.6055, "step": 180 }, { "epoch": 0.04581118704125538, "grad_norm": 0.19432251155376434, "learning_rate": 1.999816556395969e-05, "loss": 0.6049, "step": 181 }, { "epoch": 0.046064287522146294, "grad_norm": 0.18179355561733246, "learning_rate": 1.999811941787451e-05, "loss": 0.6087, "step": 182 }, { "epoch": 0.0463173880030372, "grad_norm": 0.16457490622997284, "learning_rate": 1.999807269861835e-05, "loss": 0.6325, "step": 183 }, { "epoch": 0.04657048848392812, "grad_norm": 0.17942102253437042, "learning_rate": 1.999802540619389e-05, "loss": 0.5882, "step": 184 }, { "epoch": 0.046823588964819036, "grad_norm": 0.1658487170934677, "learning_rate": 1.9997977540603845e-05, "loss": 0.602, "step": 185 }, { "epoch": 0.047076689445709946, "grad_norm": 0.16457369923591614, "learning_rate": 1.999792910185095e-05, "loss": 0.5894, "step": 186 }, { "epoch": 0.04732978992660086, "grad_norm": 0.1655249446630478, "learning_rate": 1.9997880089937995e-05, "loss": 0.5942, "step": 187 }, { "epoch": 0.04758289040749177, "grad_norm": 0.1669139415025711, "learning_rate": 1.9997830504867777e-05, "loss": 0.606, "step": 188 }, { "epoch": 0.04783599088838269, "grad_norm": 0.17530295252799988, "learning_rate": 1.9997780346643147e-05, "loss": 0.616, "step": 189 }, { "epoch": 0.0480890913692736, "grad_norm": 0.16950328648090363, "learning_rate": 1.9997729615266975e-05, "loss": 0.614, "step": 190 }, { "epoch": 0.048342191850164515, "grad_norm": 0.16296598315238953, "learning_rate": 1.999767831074217e-05, "loss": 0.6147, "step": 191 }, { "epoch": 0.04859529233105543, "grad_norm": 0.15502794086933136, "learning_rate": 1.999762643307168e-05, "loss": 0.5812, "step": 192 }, { "epoch": 0.04884839281194634, "grad_norm": 0.16831602156162262, "learning_rate": 1.999757398225847e-05, "loss": 0.5909, "step": 193 }, { "epoch": 0.04910149329283726, "grad_norm": 0.1634550243616104, "learning_rate": 1.9997520958305556e-05, "loss": 0.5875, "step": 194 }, { "epoch": 0.04935459377372817, "grad_norm": 0.17237253487110138, "learning_rate": 1.9997467361215966e-05, "loss": 0.632, "step": 195 }, { "epoch": 0.049607694254619084, "grad_norm": 0.16299310326576233, "learning_rate": 1.9997413190992785e-05, "loss": 0.6021, "step": 196 }, { "epoch": 0.04986079473551, "grad_norm": 0.16284602880477905, "learning_rate": 1.999735844763911e-05, "loss": 0.591, "step": 197 }, { "epoch": 0.05011389521640091, "grad_norm": 0.16349320113658905, "learning_rate": 1.9997303131158082e-05, "loss": 0.5971, "step": 198 }, { "epoch": 0.050366995697291826, "grad_norm": 0.1635074019432068, "learning_rate": 1.9997247241552872e-05, "loss": 0.6136, "step": 199 }, { "epoch": 0.050620096178182736, "grad_norm": 0.16308331489562988, "learning_rate": 1.9997190778826685e-05, "loss": 0.5461, "step": 200 }, { "epoch": 0.05087319665907365, "grad_norm": 0.1602434664964676, "learning_rate": 1.9997133742982755e-05, "loss": 0.6125, "step": 201 }, { "epoch": 0.05112629713996457, "grad_norm": 0.16787537932395935, "learning_rate": 1.9997076134024356e-05, "loss": 0.6193, "step": 202 }, { "epoch": 0.05137939762085548, "grad_norm": 0.16162943840026855, "learning_rate": 1.9997017951954788e-05, "loss": 0.6082, "step": 203 }, { "epoch": 0.051632498101746395, "grad_norm": 0.17052938044071198, "learning_rate": 1.9996959196777388e-05, "loss": 0.5929, "step": 204 }, { "epoch": 0.051885598582637305, "grad_norm": 0.17506612837314606, "learning_rate": 1.9996899868495524e-05, "loss": 0.618, "step": 205 }, { "epoch": 0.05213869906352822, "grad_norm": 0.19307924807071686, "learning_rate": 1.9996839967112595e-05, "loss": 0.5826, "step": 206 }, { "epoch": 0.05239179954441914, "grad_norm": 0.16263261437416077, "learning_rate": 1.9996779492632035e-05, "loss": 0.5959, "step": 207 }, { "epoch": 0.05264490002531005, "grad_norm": 0.2174728810787201, "learning_rate": 1.999671844505731e-05, "loss": 0.5998, "step": 208 }, { "epoch": 0.052898000506200964, "grad_norm": 0.1637093722820282, "learning_rate": 1.9996656824391927e-05, "loss": 0.5784, "step": 209 }, { "epoch": 0.05315110098709187, "grad_norm": 0.17159394919872284, "learning_rate": 1.9996594630639415e-05, "loss": 0.5975, "step": 210 }, { "epoch": 0.05340420146798279, "grad_norm": 0.16354690492153168, "learning_rate": 1.9996531863803334e-05, "loss": 0.61, "step": 211 }, { "epoch": 0.053657301948873706, "grad_norm": 0.16591696441173553, "learning_rate": 1.9996468523887286e-05, "loss": 0.604, "step": 212 }, { "epoch": 0.053910402429764616, "grad_norm": 0.16789790987968445, "learning_rate": 1.9996404610894905e-05, "loss": 0.5941, "step": 213 }, { "epoch": 0.05416350291065553, "grad_norm": 0.1629846841096878, "learning_rate": 1.999634012482985e-05, "loss": 0.5854, "step": 214 }, { "epoch": 0.05441660339154644, "grad_norm": 0.17834800481796265, "learning_rate": 1.9996275065695823e-05, "loss": 0.6057, "step": 215 }, { "epoch": 0.05466970387243736, "grad_norm": 0.16474172472953796, "learning_rate": 1.9996209433496546e-05, "loss": 0.6017, "step": 216 }, { "epoch": 0.05492280435332827, "grad_norm": 0.15831775963306427, "learning_rate": 1.9996143228235793e-05, "loss": 0.6176, "step": 217 }, { "epoch": 0.055175904834219185, "grad_norm": 0.1640530377626419, "learning_rate": 1.999607644991735e-05, "loss": 0.6052, "step": 218 }, { "epoch": 0.0554290053151101, "grad_norm": 0.1691807508468628, "learning_rate": 1.9996009098545047e-05, "loss": 0.6023, "step": 219 }, { "epoch": 0.05568210579600101, "grad_norm": 0.17375995218753815, "learning_rate": 1.999594117412275e-05, "loss": 0.588, "step": 220 }, { "epoch": 0.05593520627689193, "grad_norm": 0.1587343066930771, "learning_rate": 1.9995872676654346e-05, "loss": 0.5992, "step": 221 }, { "epoch": 0.05618830675778284, "grad_norm": 0.16630913317203522, "learning_rate": 1.9995803606143768e-05, "loss": 0.613, "step": 222 }, { "epoch": 0.05644140723867375, "grad_norm": 0.15650874376296997, "learning_rate": 1.9995733962594966e-05, "loss": 0.5881, "step": 223 }, { "epoch": 0.05669450771956467, "grad_norm": 0.17238104343414307, "learning_rate": 1.9995663746011947e-05, "loss": 0.5898, "step": 224 }, { "epoch": 0.05694760820045558, "grad_norm": 0.1555783599615097, "learning_rate": 1.9995592956398725e-05, "loss": 0.5927, "step": 225 }, { "epoch": 0.057200708681346496, "grad_norm": 0.16336458921432495, "learning_rate": 1.9995521593759365e-05, "loss": 0.6055, "step": 226 }, { "epoch": 0.057453809162237406, "grad_norm": 0.17016665637493134, "learning_rate": 1.999544965809795e-05, "loss": 0.6168, "step": 227 }, { "epoch": 0.05770690964312832, "grad_norm": 0.1600721925497055, "learning_rate": 1.9995377149418613e-05, "loss": 0.5961, "step": 228 }, { "epoch": 0.05796001012401924, "grad_norm": 0.18717394769191742, "learning_rate": 1.9995304067725504e-05, "loss": 0.5645, "step": 229 }, { "epoch": 0.05821311060491015, "grad_norm": 0.1661251187324524, "learning_rate": 1.9995230413022816e-05, "loss": 0.5786, "step": 230 }, { "epoch": 0.058466211085801065, "grad_norm": 0.1633238047361374, "learning_rate": 1.9995156185314774e-05, "loss": 0.6195, "step": 231 }, { "epoch": 0.058719311566691974, "grad_norm": 0.16088761389255524, "learning_rate": 1.999508138460563e-05, "loss": 0.5744, "step": 232 }, { "epoch": 0.05897241204758289, "grad_norm": 0.17186705768108368, "learning_rate": 1.9995006010899668e-05, "loss": 0.6101, "step": 233 }, { "epoch": 0.05922551252847381, "grad_norm": 0.16114409267902374, "learning_rate": 1.9994930064201214e-05, "loss": 0.5771, "step": 234 }, { "epoch": 0.05947861300936472, "grad_norm": 0.15168946981430054, "learning_rate": 1.999485354451462e-05, "loss": 0.5918, "step": 235 }, { "epoch": 0.059731713490255633, "grad_norm": 0.17712467908859253, "learning_rate": 1.999477645184428e-05, "loss": 0.5937, "step": 236 }, { "epoch": 0.05998481397114654, "grad_norm": 0.16912341117858887, "learning_rate": 1.99946987861946e-05, "loss": 0.5937, "step": 237 }, { "epoch": 0.06023791445203746, "grad_norm": 0.1597040891647339, "learning_rate": 1.9994620547570044e-05, "loss": 0.5889, "step": 238 }, { "epoch": 0.06049101493292837, "grad_norm": 0.16447393596172333, "learning_rate": 1.999454173597509e-05, "loss": 0.6258, "step": 239 }, { "epoch": 0.060744115413819286, "grad_norm": 0.166543111205101, "learning_rate": 1.9994462351414264e-05, "loss": 0.594, "step": 240 }, { "epoch": 0.0609972158947102, "grad_norm": 0.16105465590953827, "learning_rate": 1.999438239389211e-05, "loss": 0.5903, "step": 241 }, { "epoch": 0.06125031637560111, "grad_norm": 0.15936554968357086, "learning_rate": 1.999430186341321e-05, "loss": 0.5915, "step": 242 }, { "epoch": 0.06150341685649203, "grad_norm": 0.15656346082687378, "learning_rate": 1.999422075998219e-05, "loss": 0.5699, "step": 243 }, { "epoch": 0.06175651733738294, "grad_norm": 0.1559823900461197, "learning_rate": 1.999413908360369e-05, "loss": 0.6093, "step": 244 }, { "epoch": 0.062009617818273854, "grad_norm": 0.16356448829174042, "learning_rate": 1.99940568342824e-05, "loss": 0.6096, "step": 245 }, { "epoch": 0.06226271829916477, "grad_norm": 0.167751282453537, "learning_rate": 1.9993974012023027e-05, "loss": 0.5826, "step": 246 }, { "epoch": 0.06251581878005569, "grad_norm": 0.15648062527179718, "learning_rate": 1.9993890616830325e-05, "loss": 0.5984, "step": 247 }, { "epoch": 0.06276891926094659, "grad_norm": 0.1637192964553833, "learning_rate": 1.9993806648709074e-05, "loss": 0.6117, "step": 248 }, { "epoch": 0.0630220197418375, "grad_norm": 0.17687813937664032, "learning_rate": 1.999372210766409e-05, "loss": 0.6144, "step": 249 }, { "epoch": 0.06327512022272842, "grad_norm": 0.15885214507579803, "learning_rate": 1.9993636993700215e-05, "loss": 0.5917, "step": 250 }, { "epoch": 0.06352822070361934, "grad_norm": 0.15977385640144348, "learning_rate": 1.9993551306822327e-05, "loss": 0.6064, "step": 251 }, { "epoch": 0.06378132118451026, "grad_norm": 0.169663205742836, "learning_rate": 1.999346504703534e-05, "loss": 0.6055, "step": 252 }, { "epoch": 0.06403442166540116, "grad_norm": 0.17267578840255737, "learning_rate": 1.999337821434421e-05, "loss": 0.6009, "step": 253 }, { "epoch": 0.06428752214629208, "grad_norm": 0.1592559516429901, "learning_rate": 1.9993290808753895e-05, "loss": 0.5861, "step": 254 }, { "epoch": 0.06454062262718299, "grad_norm": 0.15290877223014832, "learning_rate": 1.999320283026942e-05, "loss": 0.5827, "step": 255 }, { "epoch": 0.06479372310807391, "grad_norm": 0.15359878540039062, "learning_rate": 1.9993114278895825e-05, "loss": 0.602, "step": 256 }, { "epoch": 0.06504682358896482, "grad_norm": 0.15598230063915253, "learning_rate": 1.999302515463819e-05, "loss": 0.6146, "step": 257 }, { "epoch": 0.06529992406985573, "grad_norm": 0.15773145854473114, "learning_rate": 1.9992935457501613e-05, "loss": 0.5922, "step": 258 }, { "epoch": 0.06555302455074664, "grad_norm": 0.15868717432022095, "learning_rate": 1.999284518749125e-05, "loss": 0.6124, "step": 259 }, { "epoch": 0.06580612503163756, "grad_norm": 0.15584896504878998, "learning_rate": 1.9992754344612265e-05, "loss": 0.5999, "step": 260 }, { "epoch": 0.06605922551252848, "grad_norm": 0.15427203476428986, "learning_rate": 1.9992662928869874e-05, "loss": 0.5952, "step": 261 }, { "epoch": 0.0663123259934194, "grad_norm": 0.15341795980930328, "learning_rate": 1.9992570940269313e-05, "loss": 0.587, "step": 262 }, { "epoch": 0.0665654264743103, "grad_norm": 0.16140134632587433, "learning_rate": 1.9992478378815857e-05, "loss": 0.5778, "step": 263 }, { "epoch": 0.06681852695520121, "grad_norm": 0.14912493526935577, "learning_rate": 1.999238524451481e-05, "loss": 0.5857, "step": 264 }, { "epoch": 0.06707162743609213, "grad_norm": 0.17488797008991241, "learning_rate": 1.999229153737152e-05, "loss": 0.5844, "step": 265 }, { "epoch": 0.06732472791698305, "grad_norm": 0.15687042474746704, "learning_rate": 1.9992197257391344e-05, "loss": 0.6298, "step": 266 }, { "epoch": 0.06757782839787396, "grad_norm": 0.16537171602249146, "learning_rate": 1.9992102404579697e-05, "loss": 0.6044, "step": 267 }, { "epoch": 0.06783092887876487, "grad_norm": 0.1586325317621231, "learning_rate": 1.999200697894202e-05, "loss": 0.6134, "step": 268 }, { "epoch": 0.06808402935965578, "grad_norm": 0.281053751707077, "learning_rate": 1.9991910980483772e-05, "loss": 0.5833, "step": 269 }, { "epoch": 0.0683371298405467, "grad_norm": 0.16128815710544586, "learning_rate": 1.9991814409210465e-05, "loss": 0.6059, "step": 270 }, { "epoch": 0.06859023032143761, "grad_norm": 0.16216106712818146, "learning_rate": 1.999171726512763e-05, "loss": 0.6201, "step": 271 }, { "epoch": 0.06884333080232853, "grad_norm": 0.16704806685447693, "learning_rate": 1.9991619548240844e-05, "loss": 0.593, "step": 272 }, { "epoch": 0.06909643128321943, "grad_norm": 0.1574796736240387, "learning_rate": 1.9991521258555703e-05, "loss": 0.5881, "step": 273 }, { "epoch": 0.06934953176411035, "grad_norm": 0.1586303412914276, "learning_rate": 1.999142239607784e-05, "loss": 0.591, "step": 274 }, { "epoch": 0.06960263224500127, "grad_norm": 0.16143415868282318, "learning_rate": 1.9991322960812928e-05, "loss": 0.5879, "step": 275 }, { "epoch": 0.06985573272589218, "grad_norm": 0.1673780381679535, "learning_rate": 1.9991222952766663e-05, "loss": 0.6059, "step": 276 }, { "epoch": 0.07010883320678309, "grad_norm": 0.16469413042068481, "learning_rate": 1.9991122371944784e-05, "loss": 0.5997, "step": 277 }, { "epoch": 0.070361933687674, "grad_norm": 0.16841571033000946, "learning_rate": 1.999102121835305e-05, "loss": 0.6089, "step": 278 }, { "epoch": 0.07061503416856492, "grad_norm": 0.16411472856998444, "learning_rate": 1.9990919491997262e-05, "loss": 0.6135, "step": 279 }, { "epoch": 0.07086813464945584, "grad_norm": 0.15883317589759827, "learning_rate": 1.999081719288325e-05, "loss": 0.6049, "step": 280 }, { "epoch": 0.07112123513034675, "grad_norm": 0.16443659365177155, "learning_rate": 1.9990714321016888e-05, "loss": 0.6231, "step": 281 }, { "epoch": 0.07137433561123765, "grad_norm": 0.16413702070713043, "learning_rate": 1.999061087640406e-05, "loss": 0.6017, "step": 282 }, { "epoch": 0.07162743609212857, "grad_norm": 0.16081872582435608, "learning_rate": 1.9990506859050706e-05, "loss": 0.5843, "step": 283 }, { "epoch": 0.07188053657301949, "grad_norm": 0.15878289937973022, "learning_rate": 1.9990402268962786e-05, "loss": 0.5691, "step": 284 }, { "epoch": 0.0721336370539104, "grad_norm": 0.15292367339134216, "learning_rate": 1.99902971061463e-05, "loss": 0.5998, "step": 285 }, { "epoch": 0.07238673753480132, "grad_norm": 0.15839214622974396, "learning_rate": 1.9990191370607268e-05, "loss": 0.5947, "step": 286 }, { "epoch": 0.07263983801569222, "grad_norm": 0.15390464663505554, "learning_rate": 1.9990085062351755e-05, "loss": 0.6064, "step": 287 }, { "epoch": 0.07289293849658314, "grad_norm": 0.1568797379732132, "learning_rate": 1.998997818138586e-05, "loss": 0.5988, "step": 288 }, { "epoch": 0.07314603897747406, "grad_norm": 0.15333013236522675, "learning_rate": 1.9989870727715706e-05, "loss": 0.5986, "step": 289 }, { "epoch": 0.07339913945836497, "grad_norm": 0.15738581120967865, "learning_rate": 1.9989762701347457e-05, "loss": 0.6029, "step": 290 }, { "epoch": 0.07365223993925589, "grad_norm": 0.15028329193592072, "learning_rate": 1.99896541022873e-05, "loss": 0.5913, "step": 291 }, { "epoch": 0.07390534042014679, "grad_norm": 0.15606540441513062, "learning_rate": 1.9989544930541464e-05, "loss": 0.5727, "step": 292 }, { "epoch": 0.07415844090103771, "grad_norm": 0.16559909284114838, "learning_rate": 1.9989435186116206e-05, "loss": 0.6129, "step": 293 }, { "epoch": 0.07441154138192863, "grad_norm": 0.16808965802192688, "learning_rate": 1.998932486901782e-05, "loss": 0.593, "step": 294 }, { "epoch": 0.07466464186281954, "grad_norm": 0.154685840010643, "learning_rate": 1.9989213979252633e-05, "loss": 0.6267, "step": 295 }, { "epoch": 0.07491774234371046, "grad_norm": 0.15596535801887512, "learning_rate": 1.9989102516826992e-05, "loss": 0.5819, "step": 296 }, { "epoch": 0.07517084282460136, "grad_norm": 0.16899852454662323, "learning_rate": 1.9988990481747296e-05, "loss": 0.6208, "step": 297 }, { "epoch": 0.07542394330549228, "grad_norm": 0.15050692856311798, "learning_rate": 1.9988877874019964e-05, "loss": 0.5892, "step": 298 }, { "epoch": 0.0756770437863832, "grad_norm": 0.1622265726327896, "learning_rate": 1.9988764693651454e-05, "loss": 0.5773, "step": 299 }, { "epoch": 0.07593014426727411, "grad_norm": 0.18525199592113495, "learning_rate": 1.9988650940648252e-05, "loss": 0.6063, "step": 300 }, { "epoch": 0.07618324474816503, "grad_norm": 0.17563289403915405, "learning_rate": 1.998853661501688e-05, "loss": 0.5942, "step": 301 }, { "epoch": 0.07643634522905593, "grad_norm": 0.1748930960893631, "learning_rate": 1.9988421716763892e-05, "loss": 0.5884, "step": 302 }, { "epoch": 0.07668944570994685, "grad_norm": 0.15376964211463928, "learning_rate": 1.9988306245895873e-05, "loss": 0.5922, "step": 303 }, { "epoch": 0.07694254619083776, "grad_norm": 0.16042698919773102, "learning_rate": 1.9988190202419443e-05, "loss": 0.5657, "step": 304 }, { "epoch": 0.07719564667172868, "grad_norm": 0.16305531561374664, "learning_rate": 1.998807358634126e-05, "loss": 0.5933, "step": 305 }, { "epoch": 0.0774487471526196, "grad_norm": 0.15527382493019104, "learning_rate": 1.9987956397668005e-05, "loss": 0.5753, "step": 306 }, { "epoch": 0.0777018476335105, "grad_norm": 0.1665937453508377, "learning_rate": 1.9987838636406397e-05, "loss": 0.5871, "step": 307 }, { "epoch": 0.07795494811440142, "grad_norm": 0.15807491540908813, "learning_rate": 1.9987720302563184e-05, "loss": 0.5953, "step": 308 }, { "epoch": 0.07820804859529233, "grad_norm": 0.16152141988277435, "learning_rate": 1.9987601396145154e-05, "loss": 0.593, "step": 309 }, { "epoch": 0.07846114907618325, "grad_norm": 0.16260159015655518, "learning_rate": 1.9987481917159123e-05, "loss": 0.6233, "step": 310 }, { "epoch": 0.07871424955707416, "grad_norm": 0.15751411020755768, "learning_rate": 1.998736186561194e-05, "loss": 0.5676, "step": 311 }, { "epoch": 0.07896735003796507, "grad_norm": 0.15959788858890533, "learning_rate": 1.9987241241510485e-05, "loss": 0.6108, "step": 312 }, { "epoch": 0.07922045051885598, "grad_norm": 0.15218234062194824, "learning_rate": 1.9987120044861676e-05, "loss": 0.5774, "step": 313 }, { "epoch": 0.0794735509997469, "grad_norm": 0.15036055445671082, "learning_rate": 1.9986998275672458e-05, "loss": 0.5523, "step": 314 }, { "epoch": 0.07972665148063782, "grad_norm": 0.16426804661750793, "learning_rate": 1.9986875933949815e-05, "loss": 0.5858, "step": 315 }, { "epoch": 0.07997975196152873, "grad_norm": 0.15715593099594116, "learning_rate": 1.9986753019700758e-05, "loss": 0.6016, "step": 316 }, { "epoch": 0.08023285244241964, "grad_norm": 0.17541229724884033, "learning_rate": 1.998662953293234e-05, "loss": 0.5837, "step": 317 }, { "epoch": 0.08048595292331055, "grad_norm": 0.1641518473625183, "learning_rate": 1.9986505473651628e-05, "loss": 0.6009, "step": 318 }, { "epoch": 0.08073905340420147, "grad_norm": 0.15388673543930054, "learning_rate": 1.9986380841865746e-05, "loss": 0.5843, "step": 319 }, { "epoch": 0.08099215388509239, "grad_norm": 0.17019514739513397, "learning_rate": 1.998625563758183e-05, "loss": 0.5964, "step": 320 }, { "epoch": 0.0812452543659833, "grad_norm": 0.15808385610580444, "learning_rate": 1.9986129860807063e-05, "loss": 0.5857, "step": 321 }, { "epoch": 0.0814983548468742, "grad_norm": 0.16654977202415466, "learning_rate": 1.9986003511548655e-05, "loss": 0.5848, "step": 322 }, { "epoch": 0.08175145532776512, "grad_norm": 0.16539111733436584, "learning_rate": 1.9985876589813848e-05, "loss": 0.5946, "step": 323 }, { "epoch": 0.08200455580865604, "grad_norm": 0.15268370509147644, "learning_rate": 1.998574909560992e-05, "loss": 0.5755, "step": 324 }, { "epoch": 0.08225765628954695, "grad_norm": 0.15759532153606415, "learning_rate": 1.9985621028944174e-05, "loss": 0.576, "step": 325 }, { "epoch": 0.08251075677043786, "grad_norm": 0.189273402094841, "learning_rate": 1.9985492389823958e-05, "loss": 0.5808, "step": 326 }, { "epoch": 0.08276385725132877, "grad_norm": 0.1537943333387375, "learning_rate": 1.998536317825665e-05, "loss": 0.599, "step": 327 }, { "epoch": 0.08301695773221969, "grad_norm": 0.16000860929489136, "learning_rate": 1.998523339424965e-05, "loss": 0.592, "step": 328 }, { "epoch": 0.0832700582131106, "grad_norm": 0.1547561138868332, "learning_rate": 1.9985103037810396e-05, "loss": 0.5943, "step": 329 }, { "epoch": 0.08352315869400152, "grad_norm": 0.1555517166852951, "learning_rate": 1.998497210894637e-05, "loss": 0.5634, "step": 330 }, { "epoch": 0.08377625917489243, "grad_norm": 0.1513368785381317, "learning_rate": 1.9984840607665073e-05, "loss": 0.5469, "step": 331 }, { "epoch": 0.08402935965578334, "grad_norm": 0.15102490782737732, "learning_rate": 1.9984708533974043e-05, "loss": 0.5763, "step": 332 }, { "epoch": 0.08428246013667426, "grad_norm": 0.1598501354455948, "learning_rate": 1.9984575887880854e-05, "loss": 0.5852, "step": 333 }, { "epoch": 0.08453556061756518, "grad_norm": 0.17087587714195251, "learning_rate": 1.998444266939311e-05, "loss": 0.5749, "step": 334 }, { "epoch": 0.08478866109845609, "grad_norm": 0.17225144803524017, "learning_rate": 1.9984308878518446e-05, "loss": 0.5848, "step": 335 }, { "epoch": 0.085041761579347, "grad_norm": 0.1622695028781891, "learning_rate": 1.998417451526453e-05, "loss": 0.5964, "step": 336 }, { "epoch": 0.08529486206023791, "grad_norm": 0.1584172397851944, "learning_rate": 1.9984039579639073e-05, "loss": 0.6052, "step": 337 }, { "epoch": 0.08554796254112883, "grad_norm": 0.1658150851726532, "learning_rate": 1.9983904071649803e-05, "loss": 0.6025, "step": 338 }, { "epoch": 0.08580106302201974, "grad_norm": 0.1496206670999527, "learning_rate": 1.998376799130449e-05, "loss": 0.5741, "step": 339 }, { "epoch": 0.08605416350291066, "grad_norm": 0.15472014248371124, "learning_rate": 1.998363133861094e-05, "loss": 0.5833, "step": 340 }, { "epoch": 0.08630726398380156, "grad_norm": 0.1652199923992157, "learning_rate": 1.998349411357698e-05, "loss": 0.6036, "step": 341 }, { "epoch": 0.08656036446469248, "grad_norm": 0.15922397375106812, "learning_rate": 1.998335631621048e-05, "loss": 0.5952, "step": 342 }, { "epoch": 0.0868134649455834, "grad_norm": 0.1518596112728119, "learning_rate": 1.998321794651934e-05, "loss": 0.5826, "step": 343 }, { "epoch": 0.08706656542647431, "grad_norm": 0.16738763451576233, "learning_rate": 1.9983079004511488e-05, "loss": 0.6016, "step": 344 }, { "epoch": 0.08731966590736523, "grad_norm": 0.15630985796451569, "learning_rate": 1.9982939490194902e-05, "loss": 0.6241, "step": 345 }, { "epoch": 0.08757276638825613, "grad_norm": 0.15981461107730865, "learning_rate": 1.9982799403577564e-05, "loss": 0.5889, "step": 346 }, { "epoch": 0.08782586686914705, "grad_norm": 0.27540192008018494, "learning_rate": 1.9982658744667517e-05, "loss": 0.6059, "step": 347 }, { "epoch": 0.08807896735003796, "grad_norm": 0.15519198775291443, "learning_rate": 1.9982517513472813e-05, "loss": 0.5731, "step": 348 }, { "epoch": 0.08833206783092888, "grad_norm": 0.15499025583267212, "learning_rate": 1.998237571000156e-05, "loss": 0.5499, "step": 349 }, { "epoch": 0.0885851683118198, "grad_norm": 0.15701013803482056, "learning_rate": 1.9982233334261885e-05, "loss": 0.5907, "step": 350 }, { "epoch": 0.0888382687927107, "grad_norm": 0.15649379789829254, "learning_rate": 1.9982090386261944e-05, "loss": 0.5898, "step": 351 }, { "epoch": 0.08909136927360162, "grad_norm": 0.15530402958393097, "learning_rate": 1.9981946866009936e-05, "loss": 0.5883, "step": 352 }, { "epoch": 0.08934446975449253, "grad_norm": 0.15217220783233643, "learning_rate": 1.9981802773514087e-05, "loss": 0.5809, "step": 353 }, { "epoch": 0.08959757023538345, "grad_norm": 0.1590508371591568, "learning_rate": 1.9981658108782663e-05, "loss": 0.5807, "step": 354 }, { "epoch": 0.08985067071627437, "grad_norm": 0.15429863333702087, "learning_rate": 1.998151287182395e-05, "loss": 0.5834, "step": 355 }, { "epoch": 0.09010377119716527, "grad_norm": 0.15378670394420624, "learning_rate": 1.9981367062646277e-05, "loss": 0.6003, "step": 356 }, { "epoch": 0.09035687167805619, "grad_norm": 0.15638568997383118, "learning_rate": 1.9981220681258004e-05, "loss": 0.5928, "step": 357 }, { "epoch": 0.0906099721589471, "grad_norm": 0.16201607882976532, "learning_rate": 1.9981073727667523e-05, "loss": 0.5835, "step": 358 }, { "epoch": 0.09086307263983802, "grad_norm": 0.157405823469162, "learning_rate": 1.9980926201883254e-05, "loss": 0.5621, "step": 359 }, { "epoch": 0.09111617312072894, "grad_norm": 0.1522633582353592, "learning_rate": 1.9980778103913663e-05, "loss": 0.5531, "step": 360 }, { "epoch": 0.09136927360161984, "grad_norm": 0.16538803279399872, "learning_rate": 1.998062943376723e-05, "loss": 0.5816, "step": 361 }, { "epoch": 0.09162237408251075, "grad_norm": 0.1762588918209076, "learning_rate": 1.9980480191452487e-05, "loss": 0.5875, "step": 362 }, { "epoch": 0.09187547456340167, "grad_norm": 0.15592646598815918, "learning_rate": 1.9980330376977984e-05, "loss": 0.6372, "step": 363 }, { "epoch": 0.09212857504429259, "grad_norm": 0.1568923443555832, "learning_rate": 1.998017999035231e-05, "loss": 0.5961, "step": 364 }, { "epoch": 0.0923816755251835, "grad_norm": 0.15493977069854736, "learning_rate": 1.998002903158409e-05, "loss": 0.5596, "step": 365 }, { "epoch": 0.0926347760060744, "grad_norm": 0.15471848845481873, "learning_rate": 1.9979877500681973e-05, "loss": 0.595, "step": 366 }, { "epoch": 0.09288787648696532, "grad_norm": 0.15223823487758636, "learning_rate": 1.997972539765465e-05, "loss": 0.5923, "step": 367 }, { "epoch": 0.09314097696785624, "grad_norm": 0.154722198843956, "learning_rate": 1.997957272251084e-05, "loss": 0.5848, "step": 368 }, { "epoch": 0.09339407744874716, "grad_norm": 0.14988532662391663, "learning_rate": 1.9979419475259293e-05, "loss": 0.5879, "step": 369 }, { "epoch": 0.09364717792963807, "grad_norm": 0.15272821485996246, "learning_rate": 1.9979265655908797e-05, "loss": 0.5856, "step": 370 }, { "epoch": 0.09390027841052898, "grad_norm": 0.1536874920129776, "learning_rate": 1.9979111264468172e-05, "loss": 0.5675, "step": 371 }, { "epoch": 0.09415337889141989, "grad_norm": 0.15007144212722778, "learning_rate": 1.9978956300946265e-05, "loss": 0.5832, "step": 372 }, { "epoch": 0.09440647937231081, "grad_norm": 0.15521340072155, "learning_rate": 1.997880076535196e-05, "loss": 0.5555, "step": 373 }, { "epoch": 0.09465957985320173, "grad_norm": 0.15635685622692108, "learning_rate": 1.9978644657694174e-05, "loss": 0.5907, "step": 374 }, { "epoch": 0.09491268033409264, "grad_norm": 0.15637192130088806, "learning_rate": 1.997848797798186e-05, "loss": 0.5855, "step": 375 }, { "epoch": 0.09516578081498354, "grad_norm": 0.15218724310398102, "learning_rate": 1.9978330726223992e-05, "loss": 0.5708, "step": 376 }, { "epoch": 0.09541888129587446, "grad_norm": 0.21592919528484344, "learning_rate": 1.9978172902429595e-05, "loss": 0.5739, "step": 377 }, { "epoch": 0.09567198177676538, "grad_norm": 0.2044958621263504, "learning_rate": 1.9978014506607705e-05, "loss": 0.5678, "step": 378 }, { "epoch": 0.0959250822576563, "grad_norm": 0.1602644920349121, "learning_rate": 1.9977855538767416e-05, "loss": 0.5839, "step": 379 }, { "epoch": 0.0961781827385472, "grad_norm": 0.15126170217990875, "learning_rate": 1.9977695998917833e-05, "loss": 0.5982, "step": 380 }, { "epoch": 0.09643128321943811, "grad_norm": 0.1637371927499771, "learning_rate": 1.99775358870681e-05, "loss": 0.589, "step": 381 }, { "epoch": 0.09668438370032903, "grad_norm": 0.1484130471944809, "learning_rate": 1.9977375203227403e-05, "loss": 0.5574, "step": 382 }, { "epoch": 0.09693748418121995, "grad_norm": 0.15195773541927338, "learning_rate": 1.997721394740495e-05, "loss": 0.5996, "step": 383 }, { "epoch": 0.09719058466211086, "grad_norm": 0.18043164908885956, "learning_rate": 1.9977052119609984e-05, "loss": 0.5906, "step": 384 }, { "epoch": 0.09744368514300177, "grad_norm": 0.15861275792121887, "learning_rate": 1.9976889719851785e-05, "loss": 0.579, "step": 385 }, { "epoch": 0.09769678562389268, "grad_norm": 0.14669226109981537, "learning_rate": 1.9976726748139658e-05, "loss": 0.5891, "step": 386 }, { "epoch": 0.0979498861047836, "grad_norm": 0.15282531082630157, "learning_rate": 1.9976563204482952e-05, "loss": 0.586, "step": 387 }, { "epoch": 0.09820298658567451, "grad_norm": 0.15087860822677612, "learning_rate": 1.997639908889104e-05, "loss": 0.5916, "step": 388 }, { "epoch": 0.09845608706656543, "grad_norm": 0.15215586125850677, "learning_rate": 1.9976234401373335e-05, "loss": 0.6154, "step": 389 }, { "epoch": 0.09870918754745633, "grad_norm": 0.15243254601955414, "learning_rate": 1.9976069141939268e-05, "loss": 0.5911, "step": 390 }, { "epoch": 0.09896228802834725, "grad_norm": 0.1560385674238205, "learning_rate": 1.9975903310598323e-05, "loss": 0.5991, "step": 391 }, { "epoch": 0.09921538850923817, "grad_norm": 0.15781332552433014, "learning_rate": 1.9975736907359997e-05, "loss": 0.5921, "step": 392 }, { "epoch": 0.09946848899012908, "grad_norm": 0.16044297814369202, "learning_rate": 1.997556993223384e-05, "loss": 0.5954, "step": 393 }, { "epoch": 0.09972158947102, "grad_norm": 0.14789925515651703, "learning_rate": 1.997540238522942e-05, "loss": 0.5788, "step": 394 }, { "epoch": 0.0999746899519109, "grad_norm": 0.1528216302394867, "learning_rate": 1.997523426635634e-05, "loss": 0.5963, "step": 395 }, { "epoch": 0.10022779043280182, "grad_norm": 0.15586043894290924, "learning_rate": 1.9975065575624237e-05, "loss": 0.5847, "step": 396 }, { "epoch": 0.10048089091369274, "grad_norm": 0.14834704995155334, "learning_rate": 1.9974896313042784e-05, "loss": 0.5776, "step": 397 }, { "epoch": 0.10073399139458365, "grad_norm": 0.1561892032623291, "learning_rate": 1.9974726478621688e-05, "loss": 0.5692, "step": 398 }, { "epoch": 0.10098709187547457, "grad_norm": 0.1573437750339508, "learning_rate": 1.9974556072370678e-05, "loss": 0.6105, "step": 399 }, { "epoch": 0.10124019235636547, "grad_norm": 0.15625901520252228, "learning_rate": 1.997438509429953e-05, "loss": 0.5999, "step": 400 }, { "epoch": 0.10149329283725639, "grad_norm": 0.15824973583221436, "learning_rate": 1.997421354441804e-05, "loss": 0.6109, "step": 401 }, { "epoch": 0.1017463933181473, "grad_norm": 0.1485176682472229, "learning_rate": 1.997404142273605e-05, "loss": 0.6122, "step": 402 }, { "epoch": 0.10199949379903822, "grad_norm": 0.1522447168827057, "learning_rate": 1.997386872926342e-05, "loss": 0.5805, "step": 403 }, { "epoch": 0.10225259427992914, "grad_norm": 0.14381229877471924, "learning_rate": 1.997369546401005e-05, "loss": 0.5514, "step": 404 }, { "epoch": 0.10250569476082004, "grad_norm": 0.15505284070968628, "learning_rate": 1.997352162698588e-05, "loss": 0.5728, "step": 405 }, { "epoch": 0.10275879524171096, "grad_norm": 0.16043666005134583, "learning_rate": 1.9973347218200867e-05, "loss": 0.579, "step": 406 }, { "epoch": 0.10301189572260187, "grad_norm": 0.15423263609409332, "learning_rate": 1.9973172237665014e-05, "loss": 0.571, "step": 407 }, { "epoch": 0.10326499620349279, "grad_norm": 0.18292605876922607, "learning_rate": 1.9972996685388353e-05, "loss": 0.5797, "step": 408 }, { "epoch": 0.1035180966843837, "grad_norm": 0.1477111428976059, "learning_rate": 1.997282056138095e-05, "loss": 0.5803, "step": 409 }, { "epoch": 0.10377119716527461, "grad_norm": 0.1475038230419159, "learning_rate": 1.99726438656529e-05, "loss": 0.566, "step": 410 }, { "epoch": 0.10402429764616553, "grad_norm": 0.15047602355480194, "learning_rate": 1.9972466598214328e-05, "loss": 0.5598, "step": 411 }, { "epoch": 0.10427739812705644, "grad_norm": 0.17586417496204376, "learning_rate": 1.9972288759075402e-05, "loss": 0.5897, "step": 412 }, { "epoch": 0.10453049860794736, "grad_norm": 0.19568774104118347, "learning_rate": 1.9972110348246313e-05, "loss": 0.5697, "step": 413 }, { "epoch": 0.10478359908883828, "grad_norm": 0.15748457610607147, "learning_rate": 1.9971931365737293e-05, "loss": 0.601, "step": 414 }, { "epoch": 0.10503669956972918, "grad_norm": 0.1497504711151123, "learning_rate": 1.9971751811558598e-05, "loss": 0.5851, "step": 415 }, { "epoch": 0.1052898000506201, "grad_norm": 0.1707945168018341, "learning_rate": 1.9971571685720524e-05, "loss": 0.606, "step": 416 }, { "epoch": 0.10554290053151101, "grad_norm": 0.15110230445861816, "learning_rate": 1.99713909882334e-05, "loss": 0.5805, "step": 417 }, { "epoch": 0.10579600101240193, "grad_norm": 0.1568954437971115, "learning_rate": 1.9971209719107585e-05, "loss": 0.5896, "step": 418 }, { "epoch": 0.10604910149329284, "grad_norm": 0.15485233068466187, "learning_rate": 1.9971027878353464e-05, "loss": 0.5706, "step": 419 }, { "epoch": 0.10630220197418375, "grad_norm": 0.15614329278469086, "learning_rate": 1.9970845465981466e-05, "loss": 0.598, "step": 420 }, { "epoch": 0.10655530245507466, "grad_norm": 0.15889252722263336, "learning_rate": 1.9970662482002047e-05, "loss": 0.5861, "step": 421 }, { "epoch": 0.10680840293596558, "grad_norm": 0.15649782121181488, "learning_rate": 1.99704789264257e-05, "loss": 0.5865, "step": 422 }, { "epoch": 0.1070615034168565, "grad_norm": 0.15167191624641418, "learning_rate": 1.9970294799262946e-05, "loss": 0.5862, "step": 423 }, { "epoch": 0.10731460389774741, "grad_norm": 0.15284578502178192, "learning_rate": 1.9970110100524343e-05, "loss": 0.5757, "step": 424 }, { "epoch": 0.10756770437863832, "grad_norm": 0.1515786200761795, "learning_rate": 1.996992483022047e-05, "loss": 0.5718, "step": 425 }, { "epoch": 0.10782080485952923, "grad_norm": 0.16168825328350067, "learning_rate": 1.9969738988361963e-05, "loss": 0.5758, "step": 426 }, { "epoch": 0.10807390534042015, "grad_norm": 0.1584496945142746, "learning_rate": 1.9969552574959464e-05, "loss": 0.5663, "step": 427 }, { "epoch": 0.10832700582131106, "grad_norm": 0.15288542211055756, "learning_rate": 1.9969365590023662e-05, "loss": 0.5915, "step": 428 }, { "epoch": 0.10858010630220197, "grad_norm": 0.19142642617225647, "learning_rate": 1.9969178033565278e-05, "loss": 0.5811, "step": 429 }, { "epoch": 0.10883320678309288, "grad_norm": 0.16370010375976562, "learning_rate": 1.996898990559507e-05, "loss": 0.6023, "step": 430 }, { "epoch": 0.1090863072639838, "grad_norm": 0.1556045413017273, "learning_rate": 1.9968801206123815e-05, "loss": 0.5838, "step": 431 }, { "epoch": 0.10933940774487472, "grad_norm": 0.14515145123004913, "learning_rate": 1.9968611935162328e-05, "loss": 0.5954, "step": 432 }, { "epoch": 0.10959250822576563, "grad_norm": 0.15034079551696777, "learning_rate": 1.996842209272147e-05, "loss": 0.5751, "step": 433 }, { "epoch": 0.10984560870665654, "grad_norm": 0.15695899724960327, "learning_rate": 1.9968231678812117e-05, "loss": 0.5727, "step": 434 }, { "epoch": 0.11009870918754745, "grad_norm": 0.15632854402065277, "learning_rate": 1.996804069344519e-05, "loss": 0.5893, "step": 435 }, { "epoch": 0.11035180966843837, "grad_norm": 0.1531461477279663, "learning_rate": 1.996784913663163e-05, "loss": 0.5851, "step": 436 }, { "epoch": 0.11060491014932929, "grad_norm": 0.15281325578689575, "learning_rate": 1.9967657008382425e-05, "loss": 0.5838, "step": 437 }, { "epoch": 0.1108580106302202, "grad_norm": 0.14954638481140137, "learning_rate": 1.996746430870859e-05, "loss": 0.5907, "step": 438 }, { "epoch": 0.1111111111111111, "grad_norm": 0.16380441188812256, "learning_rate": 1.9967271037621167e-05, "loss": 0.5849, "step": 439 }, { "epoch": 0.11136421159200202, "grad_norm": 0.19176936149597168, "learning_rate": 1.996707719513124e-05, "loss": 0.5831, "step": 440 }, { "epoch": 0.11161731207289294, "grad_norm": 0.1506921797990799, "learning_rate": 1.996688278124992e-05, "loss": 0.5423, "step": 441 }, { "epoch": 0.11187041255378385, "grad_norm": 0.15436936914920807, "learning_rate": 1.996668779598835e-05, "loss": 0.5848, "step": 442 }, { "epoch": 0.11212351303467477, "grad_norm": 0.15891426801681519, "learning_rate": 1.9966492239357712e-05, "loss": 0.5739, "step": 443 }, { "epoch": 0.11237661351556567, "grad_norm": 0.15040729939937592, "learning_rate": 1.9966296111369215e-05, "loss": 0.5511, "step": 444 }, { "epoch": 0.11262971399645659, "grad_norm": 0.15116922557353973, "learning_rate": 1.9966099412034104e-05, "loss": 0.5953, "step": 445 }, { "epoch": 0.1128828144773475, "grad_norm": 0.15750816464424133, "learning_rate": 1.996590214136365e-05, "loss": 0.5751, "step": 446 }, { "epoch": 0.11313591495823842, "grad_norm": 0.1517753303050995, "learning_rate": 1.996570429936917e-05, "loss": 0.6068, "step": 447 }, { "epoch": 0.11338901543912934, "grad_norm": 0.14933405816555023, "learning_rate": 1.9965505886062004e-05, "loss": 0.577, "step": 448 }, { "epoch": 0.11364211592002024, "grad_norm": 0.14812275767326355, "learning_rate": 1.996530690145352e-05, "loss": 0.5704, "step": 449 }, { "epoch": 0.11389521640091116, "grad_norm": 0.16272808611392975, "learning_rate": 1.9965107345555133e-05, "loss": 0.5688, "step": 450 }, { "epoch": 0.11414831688180208, "grad_norm": 0.14796628057956696, "learning_rate": 1.996490721837828e-05, "loss": 0.5806, "step": 451 }, { "epoch": 0.11440141736269299, "grad_norm": 0.14997173845767975, "learning_rate": 1.9964706519934432e-05, "loss": 0.5763, "step": 452 }, { "epoch": 0.11465451784358391, "grad_norm": 0.15785318613052368, "learning_rate": 1.99645052502351e-05, "loss": 0.5757, "step": 453 }, { "epoch": 0.11490761832447481, "grad_norm": 0.14751143753528595, "learning_rate": 1.996430340929182e-05, "loss": 0.5592, "step": 454 }, { "epoch": 0.11516071880536573, "grad_norm": 0.14795641601085663, "learning_rate": 1.996410099711616e-05, "loss": 0.5707, "step": 455 }, { "epoch": 0.11541381928625664, "grad_norm": 0.1508340984582901, "learning_rate": 1.9963898013719726e-05, "loss": 0.576, "step": 456 }, { "epoch": 0.11566691976714756, "grad_norm": 0.1608547866344452, "learning_rate": 1.9963694459114155e-05, "loss": 0.5707, "step": 457 }, { "epoch": 0.11592002024803848, "grad_norm": 0.15005795657634735, "learning_rate": 1.9963490333311116e-05, "loss": 0.5692, "step": 458 }, { "epoch": 0.11617312072892938, "grad_norm": 0.14821495115756989, "learning_rate": 1.9963285636322312e-05, "loss": 0.5444, "step": 459 }, { "epoch": 0.1164262212098203, "grad_norm": 0.15709495544433594, "learning_rate": 1.9963080368159476e-05, "loss": 0.5943, "step": 460 }, { "epoch": 0.11667932169071121, "grad_norm": 0.14981617033481598, "learning_rate": 1.996287452883438e-05, "loss": 0.5595, "step": 461 }, { "epoch": 0.11693242217160213, "grad_norm": 0.15191037952899933, "learning_rate": 1.9962668118358814e-05, "loss": 0.5768, "step": 462 }, { "epoch": 0.11718552265249305, "grad_norm": 0.14880812168121338, "learning_rate": 1.9962461136744624e-05, "loss": 0.5714, "step": 463 }, { "epoch": 0.11743862313338395, "grad_norm": 0.17096273601055145, "learning_rate": 1.9962253584003666e-05, "loss": 0.5672, "step": 464 }, { "epoch": 0.11769172361427487, "grad_norm": 0.15059910714626312, "learning_rate": 1.9962045460147843e-05, "loss": 0.5638, "step": 465 }, { "epoch": 0.11794482409516578, "grad_norm": 0.1539696604013443, "learning_rate": 1.9961836765189088e-05, "loss": 0.579, "step": 466 }, { "epoch": 0.1181979245760567, "grad_norm": 0.15227748453617096, "learning_rate": 1.996162749913936e-05, "loss": 0.5598, "step": 467 }, { "epoch": 0.11845102505694761, "grad_norm": 0.144993394613266, "learning_rate": 1.996141766201066e-05, "loss": 0.6033, "step": 468 }, { "epoch": 0.11870412553783852, "grad_norm": 0.15821410715579987, "learning_rate": 1.996120725381502e-05, "loss": 0.6059, "step": 469 }, { "epoch": 0.11895722601872943, "grad_norm": 0.15914399921894073, "learning_rate": 1.9960996274564493e-05, "loss": 0.5961, "step": 470 }, { "epoch": 0.11921032649962035, "grad_norm": 0.1500304937362671, "learning_rate": 1.996078472427118e-05, "loss": 0.5553, "step": 471 }, { "epoch": 0.11946342698051127, "grad_norm": 0.15498086810112, "learning_rate": 1.996057260294721e-05, "loss": 0.5705, "step": 472 }, { "epoch": 0.11971652746140218, "grad_norm": 0.14791083335876465, "learning_rate": 1.996035991060474e-05, "loss": 0.5588, "step": 473 }, { "epoch": 0.11996962794229309, "grad_norm": 0.15489189326763153, "learning_rate": 1.9960146647255962e-05, "loss": 0.58, "step": 474 }, { "epoch": 0.120222728423184, "grad_norm": 0.14758768677711487, "learning_rate": 1.9959932812913108e-05, "loss": 0.5926, "step": 475 }, { "epoch": 0.12047582890407492, "grad_norm": 0.15335911512374878, "learning_rate": 1.9959718407588436e-05, "loss": 0.5965, "step": 476 }, { "epoch": 0.12072892938496584, "grad_norm": 0.16314727067947388, "learning_rate": 1.995950343129423e-05, "loss": 0.5807, "step": 477 }, { "epoch": 0.12098202986585674, "grad_norm": 0.15755096077919006, "learning_rate": 1.995928788404282e-05, "loss": 0.5603, "step": 478 }, { "epoch": 0.12123513034674765, "grad_norm": 0.15094897150993347, "learning_rate": 1.995907176584656e-05, "loss": 0.5649, "step": 479 }, { "epoch": 0.12148823082763857, "grad_norm": 0.1507454514503479, "learning_rate": 1.9958855076717844e-05, "loss": 0.5594, "step": 480 }, { "epoch": 0.12174133130852949, "grad_norm": 0.14879706501960754, "learning_rate": 1.995863781666909e-05, "loss": 0.5627, "step": 481 }, { "epoch": 0.1219944317894204, "grad_norm": 0.14595156908035278, "learning_rate": 1.9958419985712756e-05, "loss": 0.6164, "step": 482 }, { "epoch": 0.12224753227031131, "grad_norm": 0.14950570464134216, "learning_rate": 1.995820158386133e-05, "loss": 0.5731, "step": 483 }, { "epoch": 0.12250063275120222, "grad_norm": 0.15327396988868713, "learning_rate": 1.9957982611127328e-05, "loss": 0.5676, "step": 484 }, { "epoch": 0.12275373323209314, "grad_norm": 0.14594602584838867, "learning_rate": 1.9957763067523304e-05, "loss": 0.5656, "step": 485 }, { "epoch": 0.12300683371298406, "grad_norm": 0.1484612226486206, "learning_rate": 1.9957542953061853e-05, "loss": 0.5828, "step": 486 }, { "epoch": 0.12325993419387497, "grad_norm": 0.15902866423130035, "learning_rate": 1.995732226775558e-05, "loss": 0.5838, "step": 487 }, { "epoch": 0.12351303467476588, "grad_norm": 0.15555420517921448, "learning_rate": 1.9957101011617147e-05, "loss": 0.5901, "step": 488 }, { "epoch": 0.12376613515565679, "grad_norm": 0.1557220220565796, "learning_rate": 1.9956879184659235e-05, "loss": 0.5861, "step": 489 }, { "epoch": 0.12401923563654771, "grad_norm": 0.15559321641921997, "learning_rate": 1.9956656786894558e-05, "loss": 0.5857, "step": 490 }, { "epoch": 0.12427233611743863, "grad_norm": 0.15801867842674255, "learning_rate": 1.995643381833587e-05, "loss": 0.5823, "step": 491 }, { "epoch": 0.12452543659832954, "grad_norm": 0.15406377613544464, "learning_rate": 1.995621027899595e-05, "loss": 0.5894, "step": 492 }, { "epoch": 0.12477853707922044, "grad_norm": 0.1492113322019577, "learning_rate": 1.9955986168887614e-05, "loss": 0.5648, "step": 493 }, { "epoch": 0.12503163756011137, "grad_norm": 0.17829811573028564, "learning_rate": 1.995576148802371e-05, "loss": 0.5827, "step": 494 }, { "epoch": 0.1252847380410023, "grad_norm": 0.20612825453281403, "learning_rate": 1.995553623641712e-05, "loss": 0.5863, "step": 495 }, { "epoch": 0.12553783852189318, "grad_norm": 0.15315592288970947, "learning_rate": 1.9955310414080756e-05, "loss": 0.5823, "step": 496 }, { "epoch": 0.1257909390027841, "grad_norm": 0.15542149543762207, "learning_rate": 1.9955084021027563e-05, "loss": 0.5756, "step": 497 }, { "epoch": 0.126044039483675, "grad_norm": 0.144132599234581, "learning_rate": 1.9954857057270517e-05, "loss": 0.5542, "step": 498 }, { "epoch": 0.12629713996456593, "grad_norm": 0.1674090474843979, "learning_rate": 1.9954629522822636e-05, "loss": 0.5658, "step": 499 }, { "epoch": 0.12655024044545685, "grad_norm": 0.1668354570865631, "learning_rate": 1.995440141769696e-05, "loss": 0.5959, "step": 500 }, { "epoch": 0.12680334092634776, "grad_norm": 0.15888547897338867, "learning_rate": 1.9954172741906566e-05, "loss": 0.5757, "step": 501 }, { "epoch": 0.12705644140723868, "grad_norm": 0.15670830011367798, "learning_rate": 1.9953943495464563e-05, "loss": 0.5624, "step": 502 }, { "epoch": 0.1273095418881296, "grad_norm": 0.15593712031841278, "learning_rate": 1.9953713678384097e-05, "loss": 0.5626, "step": 503 }, { "epoch": 0.1275626423690205, "grad_norm": 0.15130479633808136, "learning_rate": 1.9953483290678334e-05, "loss": 0.582, "step": 504 }, { "epoch": 0.1278157428499114, "grad_norm": 0.1545686274766922, "learning_rate": 1.9953252332360495e-05, "loss": 0.5529, "step": 505 }, { "epoch": 0.12806884333080232, "grad_norm": 0.1504255086183548, "learning_rate": 1.995302080344381e-05, "loss": 0.5847, "step": 506 }, { "epoch": 0.12832194381169323, "grad_norm": 0.15030816197395325, "learning_rate": 1.9952788703941554e-05, "loss": 0.5671, "step": 507 }, { "epoch": 0.12857504429258415, "grad_norm": 0.15274129807949066, "learning_rate": 1.9952556033867036e-05, "loss": 0.6036, "step": 508 }, { "epoch": 0.12882814477347507, "grad_norm": 0.16378158330917358, "learning_rate": 1.995232279323359e-05, "loss": 0.5824, "step": 509 }, { "epoch": 0.12908124525436598, "grad_norm": 0.14647503197193146, "learning_rate": 1.9952088982054592e-05, "loss": 0.5627, "step": 510 }, { "epoch": 0.1293343457352569, "grad_norm": 0.14909817278385162, "learning_rate": 1.9951854600343443e-05, "loss": 0.567, "step": 511 }, { "epoch": 0.12958744621614782, "grad_norm": 0.1562184989452362, "learning_rate": 1.995161964811358e-05, "loss": 0.5692, "step": 512 }, { "epoch": 0.12984054669703873, "grad_norm": 0.15351241827011108, "learning_rate": 1.995138412537847e-05, "loss": 0.6028, "step": 513 }, { "epoch": 0.13009364717792965, "grad_norm": 0.15046747028827667, "learning_rate": 1.9951148032151623e-05, "loss": 0.574, "step": 514 }, { "epoch": 0.13034674765882054, "grad_norm": 0.15114392340183258, "learning_rate": 1.995091136844656e-05, "loss": 0.5647, "step": 515 }, { "epoch": 0.13059984813971146, "grad_norm": 0.1495080143213272, "learning_rate": 1.995067413427686e-05, "loss": 0.5757, "step": 516 }, { "epoch": 0.13085294862060237, "grad_norm": 0.16302980482578278, "learning_rate": 1.9950436329656126e-05, "loss": 0.561, "step": 517 }, { "epoch": 0.1311060491014933, "grad_norm": 0.15612943470478058, "learning_rate": 1.9950197954597976e-05, "loss": 0.5664, "step": 518 }, { "epoch": 0.1313591495823842, "grad_norm": 0.15150874853134155, "learning_rate": 1.994995900911609e-05, "loss": 0.5726, "step": 519 }, { "epoch": 0.13161225006327512, "grad_norm": 0.15911947190761566, "learning_rate": 1.9949719493224156e-05, "loss": 0.5715, "step": 520 }, { "epoch": 0.13186535054416604, "grad_norm": 0.17096064984798431, "learning_rate": 1.994947940693591e-05, "loss": 0.5867, "step": 521 }, { "epoch": 0.13211845102505695, "grad_norm": 0.15780529379844666, "learning_rate": 1.9949238750265114e-05, "loss": 0.573, "step": 522 }, { "epoch": 0.13237155150594787, "grad_norm": 0.14687465131282806, "learning_rate": 1.9948997523225567e-05, "loss": 0.548, "step": 523 }, { "epoch": 0.1326246519868388, "grad_norm": 0.14603029191493988, "learning_rate": 1.9948755725831096e-05, "loss": 0.5669, "step": 524 }, { "epoch": 0.13287775246772968, "grad_norm": 0.14853985607624054, "learning_rate": 1.9948513358095565e-05, "loss": 0.6054, "step": 525 }, { "epoch": 0.1331308529486206, "grad_norm": 0.150251105427742, "learning_rate": 1.9948270420032862e-05, "loss": 0.5661, "step": 526 }, { "epoch": 0.1333839534295115, "grad_norm": 0.16034545004367828, "learning_rate": 1.994802691165692e-05, "loss": 0.5722, "step": 527 }, { "epoch": 0.13363705391040243, "grad_norm": 0.15148800611495972, "learning_rate": 1.99477828329817e-05, "loss": 0.5698, "step": 528 }, { "epoch": 0.13389015439129334, "grad_norm": 0.15985143184661865, "learning_rate": 1.994753818402119e-05, "loss": 0.5695, "step": 529 }, { "epoch": 0.13414325487218426, "grad_norm": 0.1527390331029892, "learning_rate": 1.994729296478942e-05, "loss": 0.5791, "step": 530 }, { "epoch": 0.13439635535307518, "grad_norm": 0.1592749059200287, "learning_rate": 1.994704717530044e-05, "loss": 0.6112, "step": 531 }, { "epoch": 0.1346494558339661, "grad_norm": 0.15453064441680908, "learning_rate": 1.9946800815568347e-05, "loss": 0.5678, "step": 532 }, { "epoch": 0.134902556314857, "grad_norm": 0.15128736197948456, "learning_rate": 1.9946553885607263e-05, "loss": 0.5487, "step": 533 }, { "epoch": 0.13515565679574792, "grad_norm": 0.15509158372879028, "learning_rate": 1.9946306385431344e-05, "loss": 0.5729, "step": 534 }, { "epoch": 0.1354087572766388, "grad_norm": 0.15262316167354584, "learning_rate": 1.994605831505478e-05, "loss": 0.5601, "step": 535 }, { "epoch": 0.13566185775752973, "grad_norm": 0.15524417161941528, "learning_rate": 1.994580967449179e-05, "loss": 0.5639, "step": 536 }, { "epoch": 0.13591495823842065, "grad_norm": 0.15370914340019226, "learning_rate": 1.994556046375663e-05, "loss": 0.5525, "step": 537 }, { "epoch": 0.13616805871931156, "grad_norm": 0.1504281908273697, "learning_rate": 1.994531068286358e-05, "loss": 0.6125, "step": 538 }, { "epoch": 0.13642115920020248, "grad_norm": 0.15541692078113556, "learning_rate": 1.994506033182697e-05, "loss": 0.5826, "step": 539 }, { "epoch": 0.1366742596810934, "grad_norm": 0.1468469351530075, "learning_rate": 1.9944809410661147e-05, "loss": 0.5543, "step": 540 }, { "epoch": 0.1369273601619843, "grad_norm": 0.14687389135360718, "learning_rate": 1.9944557919380492e-05, "loss": 0.5673, "step": 541 }, { "epoch": 0.13718046064287523, "grad_norm": 0.15926910936832428, "learning_rate": 1.994430585799943e-05, "loss": 0.5855, "step": 542 }, { "epoch": 0.13743356112376615, "grad_norm": 0.15011908113956451, "learning_rate": 1.9944053226532408e-05, "loss": 0.5867, "step": 543 }, { "epoch": 0.13768666160465706, "grad_norm": 0.14907962083816528, "learning_rate": 1.9943800024993906e-05, "loss": 0.5923, "step": 544 }, { "epoch": 0.13793976208554795, "grad_norm": 0.15277652442455292, "learning_rate": 1.9943546253398443e-05, "loss": 0.5812, "step": 545 }, { "epoch": 0.13819286256643887, "grad_norm": 0.14851029217243195, "learning_rate": 1.9943291911760564e-05, "loss": 0.5568, "step": 546 }, { "epoch": 0.13844596304732978, "grad_norm": 0.1487397402524948, "learning_rate": 1.9943037000094857e-05, "loss": 0.5823, "step": 547 }, { "epoch": 0.1386990635282207, "grad_norm": 0.14926819503307343, "learning_rate": 1.9942781518415927e-05, "loss": 0.5911, "step": 548 }, { "epoch": 0.13895216400911162, "grad_norm": 0.14806623756885529, "learning_rate": 1.9942525466738423e-05, "loss": 0.6017, "step": 549 }, { "epoch": 0.13920526449000253, "grad_norm": 0.14928856492042542, "learning_rate": 1.9942268845077022e-05, "loss": 0.5873, "step": 550 }, { "epoch": 0.13945836497089345, "grad_norm": 0.14869250357151031, "learning_rate": 1.9942011653446444e-05, "loss": 0.5809, "step": 551 }, { "epoch": 0.13971146545178437, "grad_norm": 0.142907053232193, "learning_rate": 1.9941753891861425e-05, "loss": 0.5781, "step": 552 }, { "epoch": 0.13996456593267528, "grad_norm": 0.14452791213989258, "learning_rate": 1.9941495560336742e-05, "loss": 0.5649, "step": 553 }, { "epoch": 0.14021766641356617, "grad_norm": 0.15031449496746063, "learning_rate": 1.9941236658887207e-05, "loss": 0.5417, "step": 554 }, { "epoch": 0.1404707668944571, "grad_norm": 0.1468997448682785, "learning_rate": 1.9940977187527665e-05, "loss": 0.5621, "step": 555 }, { "epoch": 0.140723867375348, "grad_norm": 0.15059807896614075, "learning_rate": 1.9940717146272988e-05, "loss": 0.5626, "step": 556 }, { "epoch": 0.14097696785623892, "grad_norm": 0.14762374758720398, "learning_rate": 1.994045653513808e-05, "loss": 0.5476, "step": 557 }, { "epoch": 0.14123006833712984, "grad_norm": 0.15583224594593048, "learning_rate": 1.9940195354137888e-05, "loss": 0.5522, "step": 558 }, { "epoch": 0.14148316881802075, "grad_norm": 0.1424167901277542, "learning_rate": 1.993993360328738e-05, "loss": 0.5484, "step": 559 }, { "epoch": 0.14173626929891167, "grad_norm": 0.18569441139698029, "learning_rate": 1.9939671282601564e-05, "loss": 0.5813, "step": 560 }, { "epoch": 0.1419893697798026, "grad_norm": 0.15066631138324738, "learning_rate": 1.993940839209548e-05, "loss": 0.5771, "step": 561 }, { "epoch": 0.1422424702606935, "grad_norm": 0.14673936367034912, "learning_rate": 1.9939144931784198e-05, "loss": 0.5665, "step": 562 }, { "epoch": 0.14249557074158442, "grad_norm": 0.1482633650302887, "learning_rate": 1.9938880901682817e-05, "loss": 0.5797, "step": 563 }, { "epoch": 0.1427486712224753, "grad_norm": 0.1495112031698227, "learning_rate": 1.993861630180648e-05, "loss": 0.562, "step": 564 }, { "epoch": 0.14300177170336623, "grad_norm": 0.15051144361495972, "learning_rate": 1.9938351132170348e-05, "loss": 0.5414, "step": 565 }, { "epoch": 0.14325487218425714, "grad_norm": 0.1446501761674881, "learning_rate": 1.993808539278963e-05, "loss": 0.534, "step": 566 }, { "epoch": 0.14350797266514806, "grad_norm": 0.14814580976963043, "learning_rate": 1.9937819083679557e-05, "loss": 0.5824, "step": 567 }, { "epoch": 0.14376107314603898, "grad_norm": 0.16074736416339874, "learning_rate": 1.9937552204855395e-05, "loss": 0.5501, "step": 568 }, { "epoch": 0.1440141736269299, "grad_norm": 0.1435040831565857, "learning_rate": 1.9937284756332445e-05, "loss": 0.5444, "step": 569 }, { "epoch": 0.1442672741078208, "grad_norm": 0.1509789079427719, "learning_rate": 1.993701673812604e-05, "loss": 0.5827, "step": 570 }, { "epoch": 0.14452037458871173, "grad_norm": 0.16079916059970856, "learning_rate": 1.9936748150251546e-05, "loss": 0.5837, "step": 571 }, { "epoch": 0.14477347506960264, "grad_norm": 0.15574325621128082, "learning_rate": 1.993647899272436e-05, "loss": 0.5711, "step": 572 }, { "epoch": 0.14502657555049356, "grad_norm": 0.14947296679019928, "learning_rate": 1.9936209265559908e-05, "loss": 0.5493, "step": 573 }, { "epoch": 0.14527967603138445, "grad_norm": 0.1521664559841156, "learning_rate": 1.9935938968773656e-05, "loss": 0.5855, "step": 574 }, { "epoch": 0.14553277651227536, "grad_norm": 0.14926466345787048, "learning_rate": 1.99356681023811e-05, "loss": 0.5916, "step": 575 }, { "epoch": 0.14578587699316628, "grad_norm": 0.14865681529045105, "learning_rate": 1.993539666639777e-05, "loss": 0.5578, "step": 576 }, { "epoch": 0.1460389774740572, "grad_norm": 0.15686194598674774, "learning_rate": 1.993512466083922e-05, "loss": 0.5774, "step": 577 }, { "epoch": 0.1462920779549481, "grad_norm": 0.14981764554977417, "learning_rate": 1.993485208572105e-05, "loss": 0.5951, "step": 578 }, { "epoch": 0.14654517843583903, "grad_norm": 0.15296462178230286, "learning_rate": 1.9934578941058883e-05, "loss": 0.5828, "step": 579 }, { "epoch": 0.14679827891672995, "grad_norm": 0.149521604180336, "learning_rate": 1.9934305226868383e-05, "loss": 0.593, "step": 580 }, { "epoch": 0.14705137939762086, "grad_norm": 0.14766348898410797, "learning_rate": 1.9934030943165235e-05, "loss": 0.5868, "step": 581 }, { "epoch": 0.14730447987851178, "grad_norm": 0.16118831932544708, "learning_rate": 1.9933756089965163e-05, "loss": 0.5953, "step": 582 }, { "epoch": 0.1475575803594027, "grad_norm": 0.14837884902954102, "learning_rate": 1.993348066728393e-05, "loss": 0.549, "step": 583 }, { "epoch": 0.14781068084029358, "grad_norm": 0.16255691647529602, "learning_rate": 1.993320467513732e-05, "loss": 0.5629, "step": 584 }, { "epoch": 0.1480637813211845, "grad_norm": 0.148208349943161, "learning_rate": 1.993292811354116e-05, "loss": 0.5518, "step": 585 }, { "epoch": 0.14831688180207542, "grad_norm": 0.15558750927448273, "learning_rate": 1.9932650982511296e-05, "loss": 0.5983, "step": 586 }, { "epoch": 0.14856998228296633, "grad_norm": 0.15297412872314453, "learning_rate": 1.9932373282063623e-05, "loss": 0.5745, "step": 587 }, { "epoch": 0.14882308276385725, "grad_norm": 0.1489609032869339, "learning_rate": 1.993209501221406e-05, "loss": 0.5471, "step": 588 }, { "epoch": 0.14907618324474817, "grad_norm": 0.1520264446735382, "learning_rate": 1.9931816172978556e-05, "loss": 0.5614, "step": 589 }, { "epoch": 0.14932928372563908, "grad_norm": 0.2518834173679352, "learning_rate": 1.99315367643731e-05, "loss": 0.5713, "step": 590 }, { "epoch": 0.14958238420653, "grad_norm": 0.21436260640621185, "learning_rate": 1.993125678641371e-05, "loss": 0.5649, "step": 591 }, { "epoch": 0.14983548468742092, "grad_norm": 0.14589393138885498, "learning_rate": 1.9930976239116436e-05, "loss": 0.5597, "step": 592 }, { "epoch": 0.15008858516831183, "grad_norm": 0.16369180381298065, "learning_rate": 1.9930695122497354e-05, "loss": 0.5536, "step": 593 }, { "epoch": 0.15034168564920272, "grad_norm": 0.148586705327034, "learning_rate": 1.9930413436572592e-05, "loss": 0.5754, "step": 594 }, { "epoch": 0.15059478613009364, "grad_norm": 0.14744792878627777, "learning_rate": 1.993013118135829e-05, "loss": 0.5879, "step": 595 }, { "epoch": 0.15084788661098456, "grad_norm": 0.15067927539348602, "learning_rate": 1.9929848356870632e-05, "loss": 0.5458, "step": 596 }, { "epoch": 0.15110098709187547, "grad_norm": 0.1520412117242813, "learning_rate": 1.992956496312583e-05, "loss": 0.5677, "step": 597 }, { "epoch": 0.1513540875727664, "grad_norm": 0.1419924944639206, "learning_rate": 1.9929281000140134e-05, "loss": 0.5646, "step": 598 }, { "epoch": 0.1516071880536573, "grad_norm": 0.1541939228773117, "learning_rate": 1.992899646792982e-05, "loss": 0.5952, "step": 599 }, { "epoch": 0.15186028853454822, "grad_norm": 0.16430619359016418, "learning_rate": 1.9928711366511198e-05, "loss": 0.5945, "step": 600 }, { "epoch": 0.15211338901543914, "grad_norm": 0.15097370743751526, "learning_rate": 1.9928425695900618e-05, "loss": 0.5813, "step": 601 }, { "epoch": 0.15236648949633005, "grad_norm": 0.1600654274225235, "learning_rate": 1.9928139456114452e-05, "loss": 0.5904, "step": 602 }, { "epoch": 0.15261958997722094, "grad_norm": 0.15649215877056122, "learning_rate": 1.992785264716911e-05, "loss": 0.5657, "step": 603 }, { "epoch": 0.15287269045811186, "grad_norm": 0.15534299612045288, "learning_rate": 1.9927565269081035e-05, "loss": 0.5694, "step": 604 }, { "epoch": 0.15312579093900278, "grad_norm": 0.15252354741096497, "learning_rate": 1.9927277321866704e-05, "loss": 0.5811, "step": 605 }, { "epoch": 0.1533788914198937, "grad_norm": 0.14859551191329956, "learning_rate": 1.992698880554262e-05, "loss": 0.57, "step": 606 }, { "epoch": 0.1536319919007846, "grad_norm": 0.1567496955394745, "learning_rate": 1.9926699720125325e-05, "loss": 0.5831, "step": 607 }, { "epoch": 0.15388509238167553, "grad_norm": 0.14938685297966003, "learning_rate": 1.9926410065631396e-05, "loss": 0.5716, "step": 608 }, { "epoch": 0.15413819286256644, "grad_norm": 0.15608009696006775, "learning_rate": 1.9926119842077433e-05, "loss": 0.5598, "step": 609 }, { "epoch": 0.15439129334345736, "grad_norm": 0.14903607964515686, "learning_rate": 1.9925829049480074e-05, "loss": 0.583, "step": 610 }, { "epoch": 0.15464439382434828, "grad_norm": 0.5127801299095154, "learning_rate": 1.992553768785599e-05, "loss": 0.5919, "step": 611 }, { "epoch": 0.1548974943052392, "grad_norm": 0.14690221846103668, "learning_rate": 1.9925245757221885e-05, "loss": 0.5601, "step": 612 }, { "epoch": 0.15515059478613008, "grad_norm": 0.1513320654630661, "learning_rate": 1.9924953257594494e-05, "loss": 0.5698, "step": 613 }, { "epoch": 0.155403695267021, "grad_norm": 0.15853796899318695, "learning_rate": 1.9924660188990587e-05, "loss": 0.5994, "step": 614 }, { "epoch": 0.1556567957479119, "grad_norm": 0.15242621302604675, "learning_rate": 1.9924366551426965e-05, "loss": 0.5915, "step": 615 }, { "epoch": 0.15590989622880283, "grad_norm": 0.15101855993270874, "learning_rate": 1.992407234492046e-05, "loss": 0.5847, "step": 616 }, { "epoch": 0.15616299670969375, "grad_norm": 0.15262015163898468, "learning_rate": 1.992377756948794e-05, "loss": 0.5834, "step": 617 }, { "epoch": 0.15641609719058466, "grad_norm": 0.1572626531124115, "learning_rate": 1.99234822251463e-05, "loss": 0.574, "step": 618 }, { "epoch": 0.15666919767147558, "grad_norm": 0.1495586633682251, "learning_rate": 1.9923186311912474e-05, "loss": 0.5489, "step": 619 }, { "epoch": 0.1569222981523665, "grad_norm": 0.14650045335292816, "learning_rate": 1.9922889829803428e-05, "loss": 0.5674, "step": 620 }, { "epoch": 0.1571753986332574, "grad_norm": 0.1512831598520279, "learning_rate": 1.9922592778836156e-05, "loss": 0.5705, "step": 621 }, { "epoch": 0.15742849911414833, "grad_norm": 0.1478670984506607, "learning_rate": 1.9922295159027692e-05, "loss": 0.5543, "step": 622 }, { "epoch": 0.15768159959503922, "grad_norm": 0.14747242629528046, "learning_rate": 1.992199697039509e-05, "loss": 0.5556, "step": 623 }, { "epoch": 0.15793470007593013, "grad_norm": 0.15013568103313446, "learning_rate": 1.9921698212955447e-05, "loss": 0.5663, "step": 624 }, { "epoch": 0.15818780055682105, "grad_norm": 0.16112448275089264, "learning_rate": 1.9921398886725897e-05, "loss": 0.5755, "step": 625 }, { "epoch": 0.15844090103771197, "grad_norm": 0.15023668110370636, "learning_rate": 1.9921098991723592e-05, "loss": 0.5578, "step": 626 }, { "epoch": 0.15869400151860288, "grad_norm": 0.16092228889465332, "learning_rate": 1.9920798527965724e-05, "loss": 0.5613, "step": 627 }, { "epoch": 0.1589471019994938, "grad_norm": 0.1508610099554062, "learning_rate": 1.9920497495469526e-05, "loss": 0.5534, "step": 628 }, { "epoch": 0.15920020248038472, "grad_norm": 0.151445209980011, "learning_rate": 1.9920195894252248e-05, "loss": 0.5834, "step": 629 }, { "epoch": 0.15945330296127563, "grad_norm": 0.14864689111709595, "learning_rate": 1.991989372433118e-05, "loss": 0.5356, "step": 630 }, { "epoch": 0.15970640344216655, "grad_norm": 0.15049447119235992, "learning_rate": 1.9919590985723653e-05, "loss": 0.5648, "step": 631 }, { "epoch": 0.15995950392305747, "grad_norm": 0.20305077731609344, "learning_rate": 1.991928767844701e-05, "loss": 0.5546, "step": 632 }, { "epoch": 0.16021260440394836, "grad_norm": 0.15501059591770172, "learning_rate": 1.991898380251865e-05, "loss": 0.5523, "step": 633 }, { "epoch": 0.16046570488483927, "grad_norm": 0.16190926730632782, "learning_rate": 1.9918679357955987e-05, "loss": 0.5666, "step": 634 }, { "epoch": 0.1607188053657302, "grad_norm": 0.15073847770690918, "learning_rate": 1.991837434477648e-05, "loss": 0.5442, "step": 635 }, { "epoch": 0.1609719058466211, "grad_norm": 0.15194927155971527, "learning_rate": 1.9918068762997607e-05, "loss": 0.568, "step": 636 }, { "epoch": 0.16122500632751202, "grad_norm": 0.1503583937883377, "learning_rate": 1.991776261263689e-05, "loss": 0.5732, "step": 637 }, { "epoch": 0.16147810680840294, "grad_norm": 0.16482821106910706, "learning_rate": 1.9917455893711883e-05, "loss": 0.6229, "step": 638 }, { "epoch": 0.16173120728929385, "grad_norm": 0.14933739602565765, "learning_rate": 1.9917148606240167e-05, "loss": 0.5671, "step": 639 }, { "epoch": 0.16198430777018477, "grad_norm": 0.15639838576316833, "learning_rate": 1.991684075023936e-05, "loss": 0.5761, "step": 640 }, { "epoch": 0.1622374082510757, "grad_norm": 0.1661742925643921, "learning_rate": 1.9916532325727105e-05, "loss": 0.5833, "step": 641 }, { "epoch": 0.1624905087319666, "grad_norm": 0.14919333159923553, "learning_rate": 1.9916223332721088e-05, "loss": 0.564, "step": 642 }, { "epoch": 0.1627436092128575, "grad_norm": 0.15366952121257782, "learning_rate": 1.9915913771239022e-05, "loss": 0.5893, "step": 643 }, { "epoch": 0.1629967096937484, "grad_norm": 0.15431956946849823, "learning_rate": 1.9915603641298654e-05, "loss": 0.5827, "step": 644 }, { "epoch": 0.16324981017463933, "grad_norm": 0.15347975492477417, "learning_rate": 1.991529294291776e-05, "loss": 0.5685, "step": 645 }, { "epoch": 0.16350291065553024, "grad_norm": 0.1506311148405075, "learning_rate": 1.991498167611416e-05, "loss": 0.5558, "step": 646 }, { "epoch": 0.16375601113642116, "grad_norm": 0.15514300763607025, "learning_rate": 1.991466984090569e-05, "loss": 0.5563, "step": 647 }, { "epoch": 0.16400911161731208, "grad_norm": 0.24941538274288177, "learning_rate": 1.9914357437310228e-05, "loss": 0.5965, "step": 648 }, { "epoch": 0.164262212098203, "grad_norm": 0.1533135324716568, "learning_rate": 1.9914044465345688e-05, "loss": 0.5596, "step": 649 }, { "epoch": 0.1645153125790939, "grad_norm": 0.1488742232322693, "learning_rate": 1.9913730925030005e-05, "loss": 0.589, "step": 650 }, { "epoch": 0.16476841305998483, "grad_norm": 0.1442538946866989, "learning_rate": 1.991341681638116e-05, "loss": 0.572, "step": 651 }, { "epoch": 0.16502151354087571, "grad_norm": 0.15130048990249634, "learning_rate": 1.9913102139417157e-05, "loss": 0.5927, "step": 652 }, { "epoch": 0.16527461402176663, "grad_norm": 0.15967319905757904, "learning_rate": 1.9912786894156038e-05, "loss": 0.5635, "step": 653 }, { "epoch": 0.16552771450265755, "grad_norm": 0.14939410984516144, "learning_rate": 1.9912471080615873e-05, "loss": 0.5761, "step": 654 }, { "epoch": 0.16578081498354846, "grad_norm": 0.15269893407821655, "learning_rate": 1.9912154698814765e-05, "loss": 0.5579, "step": 655 }, { "epoch": 0.16603391546443938, "grad_norm": 0.1510346531867981, "learning_rate": 1.9911837748770857e-05, "loss": 0.5591, "step": 656 }, { "epoch": 0.1662870159453303, "grad_norm": 0.14931762218475342, "learning_rate": 1.9911520230502316e-05, "loss": 0.5706, "step": 657 }, { "epoch": 0.1665401164262212, "grad_norm": 0.15608814358711243, "learning_rate": 1.9911202144027343e-05, "loss": 0.5527, "step": 658 }, { "epoch": 0.16679321690711213, "grad_norm": 0.1536053717136383, "learning_rate": 1.9910883489364178e-05, "loss": 0.5994, "step": 659 }, { "epoch": 0.16704631738800305, "grad_norm": 0.14757314324378967, "learning_rate": 1.9910564266531084e-05, "loss": 0.5724, "step": 660 }, { "epoch": 0.16729941786889396, "grad_norm": 0.1864614188671112, "learning_rate": 1.9910244475546362e-05, "loss": 0.5572, "step": 661 }, { "epoch": 0.16755251834978485, "grad_norm": 0.1520412564277649, "learning_rate": 1.990992411642835e-05, "loss": 0.5817, "step": 662 }, { "epoch": 0.16780561883067577, "grad_norm": 0.15760135650634766, "learning_rate": 1.9909603189195405e-05, "loss": 0.5514, "step": 663 }, { "epoch": 0.16805871931156668, "grad_norm": 0.14856499433517456, "learning_rate": 1.9909281693865935e-05, "loss": 0.5622, "step": 664 }, { "epoch": 0.1683118197924576, "grad_norm": 0.14778557419776917, "learning_rate": 1.9908959630458362e-05, "loss": 0.5571, "step": 665 }, { "epoch": 0.16856492027334852, "grad_norm": 0.1456754058599472, "learning_rate": 1.9908636998991156e-05, "loss": 0.5677, "step": 666 }, { "epoch": 0.16881802075423943, "grad_norm": 0.1472209244966507, "learning_rate": 1.9908313799482807e-05, "loss": 0.5751, "step": 667 }, { "epoch": 0.16907112123513035, "grad_norm": 0.15274663269519806, "learning_rate": 1.9907990031951847e-05, "loss": 0.5367, "step": 668 }, { "epoch": 0.16932422171602127, "grad_norm": 0.14899861812591553, "learning_rate": 1.9907665696416835e-05, "loss": 0.5721, "step": 669 }, { "epoch": 0.16957732219691218, "grad_norm": 0.1461005061864853, "learning_rate": 1.9907340792896362e-05, "loss": 0.5664, "step": 670 }, { "epoch": 0.1698304226778031, "grad_norm": 0.1556117981672287, "learning_rate": 1.9907015321409063e-05, "loss": 0.5747, "step": 671 }, { "epoch": 0.170083523158694, "grad_norm": 0.15157921612262726, "learning_rate": 1.9906689281973585e-05, "loss": 0.5702, "step": 672 }, { "epoch": 0.1703366236395849, "grad_norm": 0.16483035683631897, "learning_rate": 1.990636267460863e-05, "loss": 0.594, "step": 673 }, { "epoch": 0.17058972412047582, "grad_norm": 0.1515565663576126, "learning_rate": 1.9906035499332917e-05, "loss": 0.5831, "step": 674 }, { "epoch": 0.17084282460136674, "grad_norm": 0.1511337012052536, "learning_rate": 1.9905707756165198e-05, "loss": 0.5637, "step": 675 }, { "epoch": 0.17109592508225765, "grad_norm": 0.15434476733207703, "learning_rate": 1.9905379445124267e-05, "loss": 0.5996, "step": 676 }, { "epoch": 0.17134902556314857, "grad_norm": 0.14820517599582672, "learning_rate": 1.9905050566228945e-05, "loss": 0.5669, "step": 677 }, { "epoch": 0.1716021260440395, "grad_norm": 0.15169605612754822, "learning_rate": 1.9904721119498084e-05, "loss": 0.5638, "step": 678 }, { "epoch": 0.1718552265249304, "grad_norm": 0.1516595184803009, "learning_rate": 1.990439110495057e-05, "loss": 0.5726, "step": 679 }, { "epoch": 0.17210832700582132, "grad_norm": 0.15837806463241577, "learning_rate": 1.9904060522605327e-05, "loss": 0.5851, "step": 680 }, { "epoch": 0.17236142748671224, "grad_norm": 0.15037870407104492, "learning_rate": 1.9903729372481302e-05, "loss": 0.5679, "step": 681 }, { "epoch": 0.17261452796760313, "grad_norm": 0.1476915329694748, "learning_rate": 1.9903397654597482e-05, "loss": 0.5509, "step": 682 }, { "epoch": 0.17286762844849404, "grad_norm": 0.15054358541965485, "learning_rate": 1.990306536897288e-05, "loss": 0.5645, "step": 683 }, { "epoch": 0.17312072892938496, "grad_norm": 0.1502549946308136, "learning_rate": 1.9902732515626546e-05, "loss": 0.6015, "step": 684 }, { "epoch": 0.17337382941027588, "grad_norm": 0.1518615037202835, "learning_rate": 1.9902399094577566e-05, "loss": 0.5778, "step": 685 }, { "epoch": 0.1736269298911668, "grad_norm": 0.1569449007511139, "learning_rate": 1.990206510584505e-05, "loss": 0.5819, "step": 686 }, { "epoch": 0.1738800303720577, "grad_norm": 0.1500752717256546, "learning_rate": 1.9901730549448147e-05, "loss": 0.5498, "step": 687 }, { "epoch": 0.17413313085294863, "grad_norm": 0.15543176233768463, "learning_rate": 1.9901395425406033e-05, "loss": 0.5501, "step": 688 }, { "epoch": 0.17438623133383954, "grad_norm": 0.15402987599372864, "learning_rate": 1.9901059733737923e-05, "loss": 0.5868, "step": 689 }, { "epoch": 0.17463933181473046, "grad_norm": 0.15203896164894104, "learning_rate": 1.9900723474463063e-05, "loss": 0.5802, "step": 690 }, { "epoch": 0.17489243229562138, "grad_norm": 0.14999938011169434, "learning_rate": 1.9900386647600726e-05, "loss": 0.5744, "step": 691 }, { "epoch": 0.17514553277651226, "grad_norm": 0.14887036383152008, "learning_rate": 1.9900049253170226e-05, "loss": 0.565, "step": 692 }, { "epoch": 0.17539863325740318, "grad_norm": 0.15507274866104126, "learning_rate": 1.9899711291190898e-05, "loss": 0.5731, "step": 693 }, { "epoch": 0.1756517337382941, "grad_norm": 0.14773182570934296, "learning_rate": 1.9899372761682128e-05, "loss": 0.5971, "step": 694 }, { "epoch": 0.175904834219185, "grad_norm": 0.17984947562217712, "learning_rate": 1.9899033664663312e-05, "loss": 0.5553, "step": 695 }, { "epoch": 0.17615793470007593, "grad_norm": 0.14991788566112518, "learning_rate": 1.9898694000153896e-05, "loss": 0.5433, "step": 696 }, { "epoch": 0.17641103518096685, "grad_norm": 0.1493489146232605, "learning_rate": 1.989835376817335e-05, "loss": 0.554, "step": 697 }, { "epoch": 0.17666413566185776, "grad_norm": 0.1503385454416275, "learning_rate": 1.9898012968741178e-05, "loss": 0.5822, "step": 698 }, { "epoch": 0.17691723614274868, "grad_norm": 0.1620415300130844, "learning_rate": 1.989767160187692e-05, "loss": 0.5697, "step": 699 }, { "epoch": 0.1771703366236396, "grad_norm": 0.15772275626659393, "learning_rate": 1.9897329667600143e-05, "loss": 0.5615, "step": 700 }, { "epoch": 0.17742343710453048, "grad_norm": 0.15086622536182404, "learning_rate": 1.9896987165930455e-05, "loss": 0.5746, "step": 701 }, { "epoch": 0.1776765375854214, "grad_norm": 0.14750191569328308, "learning_rate": 1.9896644096887483e-05, "loss": 0.591, "step": 702 }, { "epoch": 0.17792963806631232, "grad_norm": 0.18089640140533447, "learning_rate": 1.98963004604909e-05, "loss": 0.5773, "step": 703 }, { "epoch": 0.17818273854720323, "grad_norm": 0.15691858530044556, "learning_rate": 1.9895956256760403e-05, "loss": 0.6003, "step": 704 }, { "epoch": 0.17843583902809415, "grad_norm": 0.15080849826335907, "learning_rate": 1.9895611485715726e-05, "loss": 0.5561, "step": 705 }, { "epoch": 0.17868893950898507, "grad_norm": 0.1499396711587906, "learning_rate": 1.9895266147376634e-05, "loss": 0.5699, "step": 706 }, { "epoch": 0.17894203998987598, "grad_norm": 0.14726616442203522, "learning_rate": 1.989492024176292e-05, "loss": 0.5649, "step": 707 }, { "epoch": 0.1791951404707669, "grad_norm": 0.14629290997982025, "learning_rate": 1.9894573768894423e-05, "loss": 0.5599, "step": 708 }, { "epoch": 0.17944824095165782, "grad_norm": 0.14857056736946106, "learning_rate": 1.9894226728790998e-05, "loss": 0.5825, "step": 709 }, { "epoch": 0.17970134143254873, "grad_norm": 0.1579265296459198, "learning_rate": 1.9893879121472546e-05, "loss": 0.5486, "step": 710 }, { "epoch": 0.17995444191343962, "grad_norm": 0.1460217982530594, "learning_rate": 1.9893530946958987e-05, "loss": 0.5746, "step": 711 }, { "epoch": 0.18020754239433054, "grad_norm": 0.1500665843486786, "learning_rate": 1.989318220527029e-05, "loss": 0.5593, "step": 712 }, { "epoch": 0.18046064287522146, "grad_norm": 0.16921649873256683, "learning_rate": 1.9892832896426438e-05, "loss": 0.5542, "step": 713 }, { "epoch": 0.18071374335611237, "grad_norm": 0.14597009122371674, "learning_rate": 1.9892483020447463e-05, "loss": 0.5825, "step": 714 }, { "epoch": 0.1809668438370033, "grad_norm": 0.15014439821243286, "learning_rate": 1.989213257735342e-05, "loss": 0.5663, "step": 715 }, { "epoch": 0.1812199443178942, "grad_norm": 0.15399925410747528, "learning_rate": 1.9891781567164404e-05, "loss": 0.5428, "step": 716 }, { "epoch": 0.18147304479878512, "grad_norm": 0.1528647243976593, "learning_rate": 1.9891429989900527e-05, "loss": 0.5704, "step": 717 }, { "epoch": 0.18172614527967604, "grad_norm": 0.15238700807094574, "learning_rate": 1.9891077845581957e-05, "loss": 0.5716, "step": 718 }, { "epoch": 0.18197924576056695, "grad_norm": 0.15280528366565704, "learning_rate": 1.989072513422887e-05, "loss": 0.607, "step": 719 }, { "epoch": 0.18223234624145787, "grad_norm": 0.14587153494358063, "learning_rate": 1.98903718558615e-05, "loss": 0.5656, "step": 720 }, { "epoch": 0.18248544672234876, "grad_norm": 0.1593136191368103, "learning_rate": 1.989001801050008e-05, "loss": 0.5719, "step": 721 }, { "epoch": 0.18273854720323968, "grad_norm": 0.14155927300453186, "learning_rate": 1.9889663598164915e-05, "loss": 0.595, "step": 722 }, { "epoch": 0.1829916476841306, "grad_norm": 0.15472820401191711, "learning_rate": 1.9889308618876317e-05, "loss": 0.5775, "step": 723 }, { "epoch": 0.1832447481650215, "grad_norm": 0.15889547765254974, "learning_rate": 1.9888953072654624e-05, "loss": 0.5711, "step": 724 }, { "epoch": 0.18349784864591243, "grad_norm": 0.14979174733161926, "learning_rate": 1.9888596959520234e-05, "loss": 0.5955, "step": 725 }, { "epoch": 0.18375094912680334, "grad_norm": 0.17373642325401306, "learning_rate": 1.9888240279493557e-05, "loss": 0.5716, "step": 726 }, { "epoch": 0.18400404960769426, "grad_norm": 0.14577554166316986, "learning_rate": 1.9887883032595037e-05, "loss": 0.5856, "step": 727 }, { "epoch": 0.18425715008858518, "grad_norm": 0.14496983587741852, "learning_rate": 1.988752521884516e-05, "loss": 0.5905, "step": 728 }, { "epoch": 0.1845102505694761, "grad_norm": 0.15008766949176788, "learning_rate": 1.9887166838264434e-05, "loss": 0.5736, "step": 729 }, { "epoch": 0.184763351050367, "grad_norm": 0.14624342322349548, "learning_rate": 1.9886807890873404e-05, "loss": 0.5416, "step": 730 }, { "epoch": 0.1850164515312579, "grad_norm": 0.1502377986907959, "learning_rate": 1.9886448376692656e-05, "loss": 0.5535, "step": 731 }, { "epoch": 0.1852695520121488, "grad_norm": 0.14800770580768585, "learning_rate": 1.988608829574279e-05, "loss": 0.5827, "step": 732 }, { "epoch": 0.18552265249303973, "grad_norm": 0.1491914838552475, "learning_rate": 1.9885727648044453e-05, "loss": 0.5701, "step": 733 }, { "epoch": 0.18577575297393065, "grad_norm": 0.15261493623256683, "learning_rate": 1.9885366433618322e-05, "loss": 0.5806, "step": 734 }, { "epoch": 0.18602885345482156, "grad_norm": 0.14738062024116516, "learning_rate": 1.9885004652485103e-05, "loss": 0.5835, "step": 735 }, { "epoch": 0.18628195393571248, "grad_norm": 0.15116408467292786, "learning_rate": 1.988464230466553e-05, "loss": 0.5753, "step": 736 }, { "epoch": 0.1865350544166034, "grad_norm": 0.1526593565940857, "learning_rate": 1.988427939018039e-05, "loss": 0.5727, "step": 737 }, { "epoch": 0.1867881548974943, "grad_norm": 0.14892533421516418, "learning_rate": 1.9883915909050472e-05, "loss": 0.5694, "step": 738 }, { "epoch": 0.18704125537838523, "grad_norm": 0.14990945160388947, "learning_rate": 1.9883551861296626e-05, "loss": 0.5663, "step": 739 }, { "epoch": 0.18729435585927615, "grad_norm": 0.14417888224124908, "learning_rate": 1.9883187246939717e-05, "loss": 0.5517, "step": 740 }, { "epoch": 0.18754745634016703, "grad_norm": 0.14893411099910736, "learning_rate": 1.9882822066000644e-05, "loss": 0.5656, "step": 741 }, { "epoch": 0.18780055682105795, "grad_norm": 0.15740130841732025, "learning_rate": 1.9882456318500347e-05, "loss": 0.5595, "step": 742 }, { "epoch": 0.18805365730194887, "grad_norm": 0.15170875191688538, "learning_rate": 1.9882090004459794e-05, "loss": 0.5772, "step": 743 }, { "epoch": 0.18830675778283978, "grad_norm": 0.1540304720401764, "learning_rate": 1.9881723123899984e-05, "loss": 0.582, "step": 744 }, { "epoch": 0.1885598582637307, "grad_norm": 0.15017232298851013, "learning_rate": 1.9881355676841947e-05, "loss": 0.5557, "step": 745 }, { "epoch": 0.18881295874462162, "grad_norm": 0.1520720273256302, "learning_rate": 1.988098766330675e-05, "loss": 0.5682, "step": 746 }, { "epoch": 0.18906605922551253, "grad_norm": 0.14698469638824463, "learning_rate": 1.9880619083315495e-05, "loss": 0.5703, "step": 747 }, { "epoch": 0.18931915970640345, "grad_norm": 0.16416220366954803, "learning_rate": 1.98802499368893e-05, "loss": 0.5467, "step": 748 }, { "epoch": 0.18957226018729437, "grad_norm": 0.14882448315620422, "learning_rate": 1.9879880224049337e-05, "loss": 0.5549, "step": 749 }, { "epoch": 0.18982536066818528, "grad_norm": 0.15382224321365356, "learning_rate": 1.98795099448168e-05, "loss": 0.5716, "step": 750 }, { "epoch": 0.19007846114907617, "grad_norm": 0.14566577970981598, "learning_rate": 1.9879139099212912e-05, "loss": 0.5848, "step": 751 }, { "epoch": 0.1903315616299671, "grad_norm": 0.1462111920118332, "learning_rate": 1.987876768725894e-05, "loss": 0.5762, "step": 752 }, { "epoch": 0.190584662110858, "grad_norm": 0.14414694905281067, "learning_rate": 1.9878395708976164e-05, "loss": 0.5293, "step": 753 }, { "epoch": 0.19083776259174892, "grad_norm": 0.2745702564716339, "learning_rate": 1.987802316438592e-05, "loss": 0.5625, "step": 754 }, { "epoch": 0.19109086307263984, "grad_norm": 0.14843228459358215, "learning_rate": 1.9877650053509566e-05, "loss": 0.5498, "step": 755 }, { "epoch": 0.19134396355353075, "grad_norm": 0.1457221359014511, "learning_rate": 1.9877276376368483e-05, "loss": 0.5439, "step": 756 }, { "epoch": 0.19159706403442167, "grad_norm": 0.15008778870105743, "learning_rate": 1.98769021329841e-05, "loss": 0.5675, "step": 757 }, { "epoch": 0.1918501645153126, "grad_norm": 0.1420939564704895, "learning_rate": 1.987652732337787e-05, "loss": 0.5727, "step": 758 }, { "epoch": 0.1921032649962035, "grad_norm": 0.1531420350074768, "learning_rate": 1.9876151947571273e-05, "loss": 0.5537, "step": 759 }, { "epoch": 0.1923563654770944, "grad_norm": 0.1537618339061737, "learning_rate": 1.9875776005585838e-05, "loss": 0.5251, "step": 760 }, { "epoch": 0.1926094659579853, "grad_norm": 0.1491342931985855, "learning_rate": 1.9875399497443114e-05, "loss": 0.5642, "step": 761 }, { "epoch": 0.19286256643887623, "grad_norm": 0.14750774204730988, "learning_rate": 1.9875022423164686e-05, "loss": 0.5551, "step": 762 }, { "epoch": 0.19311566691976714, "grad_norm": 0.14625151455402374, "learning_rate": 1.9874644782772167e-05, "loss": 0.5609, "step": 763 }, { "epoch": 0.19336876740065806, "grad_norm": 0.15412314236164093, "learning_rate": 1.9874266576287215e-05, "loss": 0.5612, "step": 764 }, { "epoch": 0.19362186788154898, "grad_norm": 0.1581011265516281, "learning_rate": 1.98738878037315e-05, "loss": 0.5554, "step": 765 }, { "epoch": 0.1938749683624399, "grad_norm": 0.14776253700256348, "learning_rate": 1.9873508465126744e-05, "loss": 0.5755, "step": 766 }, { "epoch": 0.1941280688433308, "grad_norm": 0.15599747002124786, "learning_rate": 1.987312856049469e-05, "loss": 0.5744, "step": 767 }, { "epoch": 0.19438116932422173, "grad_norm": 0.15313883125782013, "learning_rate": 1.9872748089857123e-05, "loss": 0.5595, "step": 768 }, { "epoch": 0.19463426980511264, "grad_norm": 0.1496153026819229, "learning_rate": 1.9872367053235847e-05, "loss": 0.5607, "step": 769 }, { "epoch": 0.19488737028600353, "grad_norm": 0.15209335088729858, "learning_rate": 1.987198545065271e-05, "loss": 0.5883, "step": 770 }, { "epoch": 0.19514047076689445, "grad_norm": 0.14694365859031677, "learning_rate": 1.9871603282129588e-05, "loss": 0.5692, "step": 771 }, { "epoch": 0.19539357124778536, "grad_norm": 0.1486019343137741, "learning_rate": 1.9871220547688392e-05, "loss": 0.5753, "step": 772 }, { "epoch": 0.19564667172867628, "grad_norm": 0.15324336290359497, "learning_rate": 1.987083724735106e-05, "loss": 0.5873, "step": 773 }, { "epoch": 0.1958997722095672, "grad_norm": 0.14359359443187714, "learning_rate": 1.9870453381139565e-05, "loss": 0.5718, "step": 774 }, { "epoch": 0.1961528726904581, "grad_norm": 0.1478722244501114, "learning_rate": 1.987006894907592e-05, "loss": 0.5813, "step": 775 }, { "epoch": 0.19640597317134903, "grad_norm": 0.1535591036081314, "learning_rate": 1.9869683951182154e-05, "loss": 0.5818, "step": 776 }, { "epoch": 0.19665907365223995, "grad_norm": 0.1460777223110199, "learning_rate": 1.9869298387480345e-05, "loss": 0.5696, "step": 777 }, { "epoch": 0.19691217413313086, "grad_norm": 0.15877650678157806, "learning_rate": 1.9868912257992593e-05, "loss": 0.5593, "step": 778 }, { "epoch": 0.19716527461402178, "grad_norm": 0.14716073870658875, "learning_rate": 1.986852556274104e-05, "loss": 0.5495, "step": 779 }, { "epoch": 0.19741837509491267, "grad_norm": 0.14650224149227142, "learning_rate": 1.9868138301747845e-05, "loss": 0.5565, "step": 780 }, { "epoch": 0.19767147557580358, "grad_norm": 0.15673044323921204, "learning_rate": 1.9867750475035216e-05, "loss": 0.5877, "step": 781 }, { "epoch": 0.1979245760566945, "grad_norm": 0.15667545795440674, "learning_rate": 1.9867362082625386e-05, "loss": 0.5733, "step": 782 }, { "epoch": 0.19817767653758542, "grad_norm": 0.18397942185401917, "learning_rate": 1.9866973124540617e-05, "loss": 0.5653, "step": 783 }, { "epoch": 0.19843077701847633, "grad_norm": 0.15077096223831177, "learning_rate": 1.9866583600803208e-05, "loss": 0.55, "step": 784 }, { "epoch": 0.19868387749936725, "grad_norm": 0.14992989599704742, "learning_rate": 1.9866193511435492e-05, "loss": 0.5574, "step": 785 }, { "epoch": 0.19893697798025817, "grad_norm": 0.14983786642551422, "learning_rate": 1.9865802856459832e-05, "loss": 0.5891, "step": 786 }, { "epoch": 0.19919007846114908, "grad_norm": 0.15408428013324738, "learning_rate": 1.9865411635898623e-05, "loss": 0.5957, "step": 787 }, { "epoch": 0.19944317894204, "grad_norm": 0.15763822197914124, "learning_rate": 1.9865019849774287e-05, "loss": 0.5684, "step": 788 }, { "epoch": 0.19969627942293092, "grad_norm": 0.14608003199100494, "learning_rate": 1.9864627498109292e-05, "loss": 0.5338, "step": 789 }, { "epoch": 0.1999493799038218, "grad_norm": 0.152947336435318, "learning_rate": 1.9864234580926127e-05, "loss": 0.573, "step": 790 }, { "epoch": 0.20020248038471272, "grad_norm": 0.15223805606365204, "learning_rate": 1.9863841098247318e-05, "loss": 0.5662, "step": 791 }, { "epoch": 0.20045558086560364, "grad_norm": 0.181410551071167, "learning_rate": 1.9863447050095425e-05, "loss": 0.5738, "step": 792 }, { "epoch": 0.20070868134649456, "grad_norm": 0.14844632148742676, "learning_rate": 1.986305243649303e-05, "loss": 0.571, "step": 793 }, { "epoch": 0.20096178182738547, "grad_norm": 0.15224933624267578, "learning_rate": 1.9862657257462764e-05, "loss": 0.575, "step": 794 }, { "epoch": 0.2012148823082764, "grad_norm": 0.14983992278575897, "learning_rate": 1.9862261513027278e-05, "loss": 0.5459, "step": 795 }, { "epoch": 0.2014679827891673, "grad_norm": 0.15010716021060944, "learning_rate": 1.986186520320926e-05, "loss": 0.5684, "step": 796 }, { "epoch": 0.20172108327005822, "grad_norm": 0.15364646911621094, "learning_rate": 1.9861468328031427e-05, "loss": 0.5923, "step": 797 }, { "epoch": 0.20197418375094914, "grad_norm": 0.24683018028736115, "learning_rate": 1.9861070887516538e-05, "loss": 0.5493, "step": 798 }, { "epoch": 0.20222728423184005, "grad_norm": 0.15156035125255585, "learning_rate": 1.986067288168737e-05, "loss": 0.5553, "step": 799 }, { "epoch": 0.20248038471273094, "grad_norm": 0.1583360731601715, "learning_rate": 1.9860274310566743e-05, "loss": 0.5715, "step": 800 }, { "epoch": 0.20273348519362186, "grad_norm": 0.14934858679771423, "learning_rate": 1.9859875174177507e-05, "loss": 0.5931, "step": 801 }, { "epoch": 0.20298658567451278, "grad_norm": 0.16253922879695892, "learning_rate": 1.985947547254254e-05, "loss": 0.6016, "step": 802 }, { "epoch": 0.2032396861554037, "grad_norm": 0.14571331441402435, "learning_rate": 1.9859075205684763e-05, "loss": 0.5308, "step": 803 }, { "epoch": 0.2034927866362946, "grad_norm": 0.1456516981124878, "learning_rate": 1.9858674373627113e-05, "loss": 0.581, "step": 804 }, { "epoch": 0.20374588711718553, "grad_norm": 0.15310640633106232, "learning_rate": 1.9858272976392574e-05, "loss": 0.5679, "step": 805 }, { "epoch": 0.20399898759807644, "grad_norm": 0.16352048516273499, "learning_rate": 1.985787101400416e-05, "loss": 0.5891, "step": 806 }, { "epoch": 0.20425208807896736, "grad_norm": 0.1480332612991333, "learning_rate": 1.985746848648491e-05, "loss": 0.5655, "step": 807 }, { "epoch": 0.20450518855985828, "grad_norm": 0.1917625367641449, "learning_rate": 1.98570653938579e-05, "loss": 0.5484, "step": 808 }, { "epoch": 0.20475828904074916, "grad_norm": 0.150888592004776, "learning_rate": 1.9856661736146244e-05, "loss": 0.5603, "step": 809 }, { "epoch": 0.20501138952164008, "grad_norm": 0.1545407623052597, "learning_rate": 1.9856257513373077e-05, "loss": 0.5379, "step": 810 }, { "epoch": 0.205264490002531, "grad_norm": 0.1561100035905838, "learning_rate": 1.9855852725561575e-05, "loss": 0.5782, "step": 811 }, { "epoch": 0.2055175904834219, "grad_norm": 0.14499087631702423, "learning_rate": 1.9855447372734943e-05, "loss": 0.5654, "step": 812 }, { "epoch": 0.20577069096431283, "grad_norm": 0.16896207630634308, "learning_rate": 1.9855041454916416e-05, "loss": 0.5453, "step": 813 }, { "epoch": 0.20602379144520375, "grad_norm": 0.15073281526565552, "learning_rate": 1.9854634972129272e-05, "loss": 0.5577, "step": 814 }, { "epoch": 0.20627689192609466, "grad_norm": 0.17091280221939087, "learning_rate": 1.9854227924396804e-05, "loss": 0.5716, "step": 815 }, { "epoch": 0.20652999240698558, "grad_norm": 0.14324995875358582, "learning_rate": 1.985382031174236e-05, "loss": 0.5561, "step": 816 }, { "epoch": 0.2067830928878765, "grad_norm": 0.15467405319213867, "learning_rate": 1.9853412134189292e-05, "loss": 0.5706, "step": 817 }, { "epoch": 0.2070361933687674, "grad_norm": 0.14770232141017914, "learning_rate": 1.985300339176101e-05, "loss": 0.5569, "step": 818 }, { "epoch": 0.2072892938496583, "grad_norm": 0.1511012762784958, "learning_rate": 1.9852594084480946e-05, "loss": 0.5587, "step": 819 }, { "epoch": 0.20754239433054922, "grad_norm": 0.1511695683002472, "learning_rate": 1.985218421237256e-05, "loss": 0.57, "step": 820 }, { "epoch": 0.20779549481144013, "grad_norm": 0.1476137489080429, "learning_rate": 1.9851773775459356e-05, "loss": 0.5687, "step": 821 }, { "epoch": 0.20804859529233105, "grad_norm": 0.14952382445335388, "learning_rate": 1.985136277376486e-05, "loss": 0.5763, "step": 822 }, { "epoch": 0.20830169577322197, "grad_norm": 0.15390071272850037, "learning_rate": 1.9850951207312628e-05, "loss": 0.5937, "step": 823 }, { "epoch": 0.20855479625411288, "grad_norm": 0.14283983409404755, "learning_rate": 1.9850539076126262e-05, "loss": 0.5638, "step": 824 }, { "epoch": 0.2088078967350038, "grad_norm": 0.1518326848745346, "learning_rate": 1.9850126380229386e-05, "loss": 0.5858, "step": 825 }, { "epoch": 0.20906099721589472, "grad_norm": 0.15040968358516693, "learning_rate": 1.984971311964566e-05, "loss": 0.5581, "step": 826 }, { "epoch": 0.20931409769678563, "grad_norm": 0.15228532254695892, "learning_rate": 1.9849299294398773e-05, "loss": 0.5566, "step": 827 }, { "epoch": 0.20956719817767655, "grad_norm": 0.15812933444976807, "learning_rate": 1.9848884904512453e-05, "loss": 0.5818, "step": 828 }, { "epoch": 0.20982029865856744, "grad_norm": 0.14517231285572052, "learning_rate": 1.984846995001045e-05, "loss": 0.5614, "step": 829 }, { "epoch": 0.21007339913945836, "grad_norm": 0.14441753923892975, "learning_rate": 1.9848054430916558e-05, "loss": 0.5678, "step": 830 }, { "epoch": 0.21032649962034927, "grad_norm": 0.14744623005390167, "learning_rate": 1.9847638347254594e-05, "loss": 0.5865, "step": 831 }, { "epoch": 0.2105796001012402, "grad_norm": 0.1468220204114914, "learning_rate": 1.9847221699048417e-05, "loss": 0.5514, "step": 832 }, { "epoch": 0.2108327005821311, "grad_norm": 0.14899007976055145, "learning_rate": 1.9846804486321902e-05, "loss": 0.5589, "step": 833 }, { "epoch": 0.21108580106302202, "grad_norm": 0.15209320187568665, "learning_rate": 1.9846386709098977e-05, "loss": 0.5523, "step": 834 }, { "epoch": 0.21133890154391294, "grad_norm": 0.14415016770362854, "learning_rate": 1.984596836740359e-05, "loss": 0.5686, "step": 835 }, { "epoch": 0.21159200202480385, "grad_norm": 0.14713110029697418, "learning_rate": 1.9845549461259715e-05, "loss": 0.5911, "step": 836 }, { "epoch": 0.21184510250569477, "grad_norm": 0.15125249326229095, "learning_rate": 1.9845129990691382e-05, "loss": 0.5706, "step": 837 }, { "epoch": 0.2120982029865857, "grad_norm": 0.14549441635608673, "learning_rate": 1.9844709955722627e-05, "loss": 0.5641, "step": 838 }, { "epoch": 0.21235130346747658, "grad_norm": 0.15025334060192108, "learning_rate": 1.9844289356377534e-05, "loss": 0.5769, "step": 839 }, { "epoch": 0.2126044039483675, "grad_norm": 0.14516359567642212, "learning_rate": 1.9843868192680213e-05, "loss": 0.5508, "step": 840 }, { "epoch": 0.2128575044292584, "grad_norm": 0.15259525179862976, "learning_rate": 1.9843446464654814e-05, "loss": 0.5719, "step": 841 }, { "epoch": 0.21311060491014933, "grad_norm": 0.1537070870399475, "learning_rate": 1.9843024172325504e-05, "loss": 0.5914, "step": 842 }, { "epoch": 0.21336370539104024, "grad_norm": 0.15067122876644135, "learning_rate": 1.98426013157165e-05, "loss": 0.5737, "step": 843 }, { "epoch": 0.21361680587193116, "grad_norm": 0.15000620484352112, "learning_rate": 1.984217789485204e-05, "loss": 0.59, "step": 844 }, { "epoch": 0.21386990635282208, "grad_norm": 0.1379261314868927, "learning_rate": 1.9841753909756406e-05, "loss": 0.5331, "step": 845 }, { "epoch": 0.214123006833713, "grad_norm": 0.3226446509361267, "learning_rate": 1.984132936045389e-05, "loss": 0.5666, "step": 846 }, { "epoch": 0.2143761073146039, "grad_norm": 0.1511106640100479, "learning_rate": 1.9840904246968837e-05, "loss": 0.5392, "step": 847 }, { "epoch": 0.21462920779549483, "grad_norm": 0.1527481973171234, "learning_rate": 1.9840478569325624e-05, "loss": 0.5539, "step": 848 }, { "epoch": 0.21488230827638571, "grad_norm": 0.15316209197044373, "learning_rate": 1.9840052327548642e-05, "loss": 0.5653, "step": 849 }, { "epoch": 0.21513540875727663, "grad_norm": 0.14784926176071167, "learning_rate": 1.9839625521662338e-05, "loss": 0.5627, "step": 850 }, { "epoch": 0.21538850923816755, "grad_norm": 0.14797787368297577, "learning_rate": 1.9839198151691172e-05, "loss": 0.5637, "step": 851 }, { "epoch": 0.21564160971905846, "grad_norm": 0.1469213217496872, "learning_rate": 1.983877021765965e-05, "loss": 0.5445, "step": 852 }, { "epoch": 0.21589471019994938, "grad_norm": 0.14689572155475616, "learning_rate": 1.98383417195923e-05, "loss": 0.5447, "step": 853 }, { "epoch": 0.2161478106808403, "grad_norm": 0.1469879001379013, "learning_rate": 1.983791265751369e-05, "loss": 0.5541, "step": 854 }, { "epoch": 0.2164009111617312, "grad_norm": 0.1490246057510376, "learning_rate": 1.9837483031448414e-05, "loss": 0.5866, "step": 855 }, { "epoch": 0.21665401164262213, "grad_norm": 0.15119101107120514, "learning_rate": 1.9837052841421106e-05, "loss": 0.564, "step": 856 }, { "epoch": 0.21690711212351305, "grad_norm": 0.15075601637363434, "learning_rate": 1.9836622087456422e-05, "loss": 0.5776, "step": 857 }, { "epoch": 0.21716021260440393, "grad_norm": 0.20193101465702057, "learning_rate": 1.9836190769579063e-05, "loss": 0.5576, "step": 858 }, { "epoch": 0.21741331308529485, "grad_norm": 0.16276125609874725, "learning_rate": 1.983575888781375e-05, "loss": 0.5525, "step": 859 }, { "epoch": 0.21766641356618577, "grad_norm": 0.14671862125396729, "learning_rate": 1.9835326442185247e-05, "loss": 0.5494, "step": 860 }, { "epoch": 0.21791951404707668, "grad_norm": 0.14973184466362, "learning_rate": 1.9834893432718338e-05, "loss": 0.563, "step": 861 }, { "epoch": 0.2181726145279676, "grad_norm": 0.14972767233848572, "learning_rate": 1.9834459859437856e-05, "loss": 0.5496, "step": 862 }, { "epoch": 0.21842571500885852, "grad_norm": 0.14641425013542175, "learning_rate": 1.9834025722368646e-05, "loss": 0.5531, "step": 863 }, { "epoch": 0.21867881548974943, "grad_norm": 0.14714324474334717, "learning_rate": 1.9833591021535604e-05, "loss": 0.5565, "step": 864 }, { "epoch": 0.21893191597064035, "grad_norm": 0.16813084483146667, "learning_rate": 1.983315575696365e-05, "loss": 0.5525, "step": 865 }, { "epoch": 0.21918501645153127, "grad_norm": 0.1511027067899704, "learning_rate": 1.9832719928677734e-05, "loss": 0.5575, "step": 866 }, { "epoch": 0.21943811693242218, "grad_norm": 0.14805647730827332, "learning_rate": 1.983228353670284e-05, "loss": 0.5616, "step": 867 }, { "epoch": 0.21969121741331307, "grad_norm": 0.14975033700466156, "learning_rate": 1.983184658106399e-05, "loss": 0.5771, "step": 868 }, { "epoch": 0.219944317894204, "grad_norm": 0.15038229525089264, "learning_rate": 1.9831409061786228e-05, "loss": 0.5907, "step": 869 }, { "epoch": 0.2201974183750949, "grad_norm": 0.1614668369293213, "learning_rate": 1.983097097889464e-05, "loss": 0.58, "step": 870 }, { "epoch": 0.22045051885598582, "grad_norm": 0.15763644874095917, "learning_rate": 1.9830532332414343e-05, "loss": 0.567, "step": 871 }, { "epoch": 0.22070361933687674, "grad_norm": 0.15375757217407227, "learning_rate": 1.9830093122370476e-05, "loss": 0.5407, "step": 872 }, { "epoch": 0.22095671981776766, "grad_norm": 0.14617526531219482, "learning_rate": 1.9829653348788228e-05, "loss": 0.5578, "step": 873 }, { "epoch": 0.22120982029865857, "grad_norm": 0.14947310090065002, "learning_rate": 1.98292130116928e-05, "loss": 0.5344, "step": 874 }, { "epoch": 0.2214629207795495, "grad_norm": 0.149801105260849, "learning_rate": 1.982877211110944e-05, "loss": 0.5652, "step": 875 }, { "epoch": 0.2217160212604404, "grad_norm": 0.1476932018995285, "learning_rate": 1.9828330647063424e-05, "loss": 0.5811, "step": 876 }, { "epoch": 0.22196912174133132, "grad_norm": 0.14732202887535095, "learning_rate": 1.9827888619580065e-05, "loss": 0.5533, "step": 877 }, { "epoch": 0.2222222222222222, "grad_norm": 0.14460918307304382, "learning_rate": 1.9827446028684695e-05, "loss": 0.5354, "step": 878 }, { "epoch": 0.22247532270311313, "grad_norm": 0.14462372660636902, "learning_rate": 1.982700287440269e-05, "loss": 0.5588, "step": 879 }, { "epoch": 0.22272842318400404, "grad_norm": 0.15274891257286072, "learning_rate": 1.9826559156759458e-05, "loss": 0.5396, "step": 880 }, { "epoch": 0.22298152366489496, "grad_norm": 0.14966580271720886, "learning_rate": 1.9826114875780434e-05, "loss": 0.5699, "step": 881 }, { "epoch": 0.22323462414578588, "grad_norm": 0.15385785698890686, "learning_rate": 1.9825670031491086e-05, "loss": 0.5526, "step": 882 }, { "epoch": 0.2234877246266768, "grad_norm": 0.15709258615970612, "learning_rate": 1.9825224623916917e-05, "loss": 0.5922, "step": 883 }, { "epoch": 0.2237408251075677, "grad_norm": 0.1467486023902893, "learning_rate": 1.9824778653083463e-05, "loss": 0.5793, "step": 884 }, { "epoch": 0.22399392558845863, "grad_norm": 0.14842136204242706, "learning_rate": 1.982433211901629e-05, "loss": 0.5616, "step": 885 }, { "epoch": 0.22424702606934954, "grad_norm": 0.1539982259273529, "learning_rate": 1.9823885021740995e-05, "loss": 0.5709, "step": 886 }, { "epoch": 0.22450012655024046, "grad_norm": 0.14926017820835114, "learning_rate": 1.9823437361283213e-05, "loss": 0.5449, "step": 887 }, { "epoch": 0.22475322703113135, "grad_norm": 0.14725536108016968, "learning_rate": 1.9822989137668603e-05, "loss": 0.6158, "step": 888 }, { "epoch": 0.22500632751202226, "grad_norm": 0.14215263724327087, "learning_rate": 1.9822540350922865e-05, "loss": 0.5622, "step": 889 }, { "epoch": 0.22525942799291318, "grad_norm": 0.16503040492534637, "learning_rate": 1.9822091001071724e-05, "loss": 0.536, "step": 890 }, { "epoch": 0.2255125284738041, "grad_norm": 0.1484951227903366, "learning_rate": 1.982164108814094e-05, "loss": 0.57, "step": 891 }, { "epoch": 0.225765628954695, "grad_norm": 0.15657170116901398, "learning_rate": 1.9821190612156307e-05, "loss": 0.5483, "step": 892 }, { "epoch": 0.22601872943558593, "grad_norm": 0.1487235128879547, "learning_rate": 1.982073957314365e-05, "loss": 0.5811, "step": 893 }, { "epoch": 0.22627182991647685, "grad_norm": 0.15387752652168274, "learning_rate": 1.9820287971128822e-05, "loss": 0.5595, "step": 894 }, { "epoch": 0.22652493039736776, "grad_norm": 0.15158569812774658, "learning_rate": 1.981983580613772e-05, "loss": 0.5584, "step": 895 }, { "epoch": 0.22677803087825868, "grad_norm": 0.15643306076526642, "learning_rate": 1.9819383078196258e-05, "loss": 0.5429, "step": 896 }, { "epoch": 0.2270311313591496, "grad_norm": 0.15127789974212646, "learning_rate": 1.9818929787330396e-05, "loss": 0.5938, "step": 897 }, { "epoch": 0.22728423184004048, "grad_norm": 0.1479039490222931, "learning_rate": 1.9818475933566116e-05, "loss": 0.5891, "step": 898 }, { "epoch": 0.2275373323209314, "grad_norm": 0.1658899486064911, "learning_rate": 1.981802151692944e-05, "loss": 0.597, "step": 899 }, { "epoch": 0.22779043280182232, "grad_norm": 0.16984491050243378, "learning_rate": 1.9817566537446415e-05, "loss": 0.5894, "step": 900 }, { "epoch": 0.22804353328271323, "grad_norm": 0.1491304188966751, "learning_rate": 1.9817110995143127e-05, "loss": 0.5737, "step": 901 }, { "epoch": 0.22829663376360415, "grad_norm": 0.14824657142162323, "learning_rate": 1.981665489004569e-05, "loss": 0.5786, "step": 902 }, { "epoch": 0.22854973424449507, "grad_norm": 0.14434686303138733, "learning_rate": 1.9816198222180252e-05, "loss": 0.5582, "step": 903 }, { "epoch": 0.22880283472538598, "grad_norm": 0.145100399851799, "learning_rate": 1.981574099157299e-05, "loss": 0.5673, "step": 904 }, { "epoch": 0.2290559352062769, "grad_norm": 0.14965581893920898, "learning_rate": 1.9815283198250125e-05, "loss": 0.5773, "step": 905 }, { "epoch": 0.22930903568716782, "grad_norm": 0.1608322560787201, "learning_rate": 1.9814824842237888e-05, "loss": 0.5536, "step": 906 }, { "epoch": 0.2295621361680587, "grad_norm": 0.1505739390850067, "learning_rate": 1.9814365923562563e-05, "loss": 0.5733, "step": 907 }, { "epoch": 0.22981523664894962, "grad_norm": 0.15027667582035065, "learning_rate": 1.981390644225046e-05, "loss": 0.5668, "step": 908 }, { "epoch": 0.23006833712984054, "grad_norm": 0.14859870076179504, "learning_rate": 1.981344639832792e-05, "loss": 0.5559, "step": 909 }, { "epoch": 0.23032143761073146, "grad_norm": 0.15014663338661194, "learning_rate": 1.9812985791821314e-05, "loss": 0.5742, "step": 910 }, { "epoch": 0.23057453809162237, "grad_norm": 0.14658929407596588, "learning_rate": 1.9812524622757047e-05, "loss": 0.5456, "step": 911 }, { "epoch": 0.2308276385725133, "grad_norm": 0.14573732018470764, "learning_rate": 1.981206289116156e-05, "loss": 0.5387, "step": 912 }, { "epoch": 0.2310807390534042, "grad_norm": 0.1504942774772644, "learning_rate": 1.981160059706132e-05, "loss": 0.5549, "step": 913 }, { "epoch": 0.23133383953429512, "grad_norm": 0.14284364879131317, "learning_rate": 1.9811137740482825e-05, "loss": 0.529, "step": 914 }, { "epoch": 0.23158694001518604, "grad_norm": 0.15270258486270905, "learning_rate": 1.9810674321452624e-05, "loss": 0.5676, "step": 915 }, { "epoch": 0.23184004049607695, "grad_norm": 0.1552465260028839, "learning_rate": 1.981021033999727e-05, "loss": 0.5408, "step": 916 }, { "epoch": 0.23209314097696784, "grad_norm": 0.15350569784641266, "learning_rate": 1.9809745796143368e-05, "loss": 0.5783, "step": 917 }, { "epoch": 0.23234624145785876, "grad_norm": 0.16560080647468567, "learning_rate": 1.980928068991755e-05, "loss": 0.5625, "step": 918 }, { "epoch": 0.23259934193874968, "grad_norm": 0.1464039832353592, "learning_rate": 1.9808815021346474e-05, "loss": 0.5525, "step": 919 }, { "epoch": 0.2328524424196406, "grad_norm": 0.14381320774555206, "learning_rate": 1.9808348790456845e-05, "loss": 0.5513, "step": 920 }, { "epoch": 0.2331055429005315, "grad_norm": 0.14749474823474884, "learning_rate": 1.980788199727538e-05, "loss": 0.5706, "step": 921 }, { "epoch": 0.23335864338142243, "grad_norm": 0.1491667926311493, "learning_rate": 1.980741464182885e-05, "loss": 0.5641, "step": 922 }, { "epoch": 0.23361174386231334, "grad_norm": 0.1418030709028244, "learning_rate": 1.980694672414404e-05, "loss": 0.527, "step": 923 }, { "epoch": 0.23386484434320426, "grad_norm": 0.14927972853183746, "learning_rate": 1.980647824424778e-05, "loss": 0.543, "step": 924 }, { "epoch": 0.23411794482409518, "grad_norm": 0.15179072320461273, "learning_rate": 1.980600920216692e-05, "loss": 0.5694, "step": 925 }, { "epoch": 0.2343710453049861, "grad_norm": 0.15006226301193237, "learning_rate": 1.9805539597928356e-05, "loss": 0.5692, "step": 926 }, { "epoch": 0.23462414578587698, "grad_norm": 0.14651387929916382, "learning_rate": 1.9805069431559007e-05, "loss": 0.5385, "step": 927 }, { "epoch": 0.2348772462667679, "grad_norm": 0.1494535505771637, "learning_rate": 1.9804598703085825e-05, "loss": 0.5311, "step": 928 }, { "epoch": 0.2351303467476588, "grad_norm": 0.15092428028583527, "learning_rate": 1.98041274125358e-05, "loss": 0.5597, "step": 929 }, { "epoch": 0.23538344722854973, "grad_norm": 0.14304505288600922, "learning_rate": 1.9803655559935943e-05, "loss": 0.5712, "step": 930 }, { "epoch": 0.23563654770944065, "grad_norm": 0.1441313475370407, "learning_rate": 1.980318314531331e-05, "loss": 0.5584, "step": 931 }, { "epoch": 0.23588964819033156, "grad_norm": 0.14732512831687927, "learning_rate": 1.9802710168694984e-05, "loss": 0.542, "step": 932 }, { "epoch": 0.23614274867122248, "grad_norm": 0.14828568696975708, "learning_rate": 1.9802236630108077e-05, "loss": 0.5549, "step": 933 }, { "epoch": 0.2363958491521134, "grad_norm": 0.14553432166576385, "learning_rate": 1.9801762529579737e-05, "loss": 0.5616, "step": 934 }, { "epoch": 0.2366489496330043, "grad_norm": 0.15267543494701385, "learning_rate": 1.9801287867137143e-05, "loss": 0.5649, "step": 935 }, { "epoch": 0.23690205011389523, "grad_norm": 0.14400067925453186, "learning_rate": 1.9800812642807508e-05, "loss": 0.5277, "step": 936 }, { "epoch": 0.23715515059478612, "grad_norm": 0.15066170692443848, "learning_rate": 1.9800336856618073e-05, "loss": 0.592, "step": 937 }, { "epoch": 0.23740825107567703, "grad_norm": 0.15186914801597595, "learning_rate": 1.9799860508596116e-05, "loss": 0.5745, "step": 938 }, { "epoch": 0.23766135155656795, "grad_norm": 0.15037445724010468, "learning_rate": 1.979938359876894e-05, "loss": 0.549, "step": 939 }, { "epoch": 0.23791445203745887, "grad_norm": 0.14689569175243378, "learning_rate": 1.9798906127163892e-05, "loss": 0.5535, "step": 940 }, { "epoch": 0.23816755251834978, "grad_norm": 0.15130499005317688, "learning_rate": 1.9798428093808343e-05, "loss": 0.5915, "step": 941 }, { "epoch": 0.2384206529992407, "grad_norm": 0.14659197628498077, "learning_rate": 1.9797949498729692e-05, "loss": 0.5625, "step": 942 }, { "epoch": 0.23867375348013162, "grad_norm": 0.14219814538955688, "learning_rate": 1.9797470341955383e-05, "loss": 0.5488, "step": 943 }, { "epoch": 0.23892685396102253, "grad_norm": 0.14325040578842163, "learning_rate": 1.9796990623512885e-05, "loss": 0.5544, "step": 944 }, { "epoch": 0.23917995444191345, "grad_norm": 0.14300473034381866, "learning_rate": 1.979651034342969e-05, "loss": 0.5794, "step": 945 }, { "epoch": 0.23943305492280437, "grad_norm": 0.14769800007343292, "learning_rate": 1.9796029501733343e-05, "loss": 0.5599, "step": 946 }, { "epoch": 0.23968615540369526, "grad_norm": 0.1453614979982376, "learning_rate": 1.9795548098451404e-05, "loss": 0.5685, "step": 947 }, { "epoch": 0.23993925588458617, "grad_norm": 0.15104912221431732, "learning_rate": 1.9795066133611468e-05, "loss": 0.5482, "step": 948 }, { "epoch": 0.2401923563654771, "grad_norm": 0.14418837428092957, "learning_rate": 1.9794583607241168e-05, "loss": 0.5535, "step": 949 }, { "epoch": 0.240445456846368, "grad_norm": 0.1435074359178543, "learning_rate": 1.979410051936817e-05, "loss": 0.5507, "step": 950 }, { "epoch": 0.24069855732725892, "grad_norm": 0.14348246157169342, "learning_rate": 1.979361687002016e-05, "loss": 0.5475, "step": 951 }, { "epoch": 0.24095165780814984, "grad_norm": 0.16258108615875244, "learning_rate": 1.9793132659224875e-05, "loss": 0.5258, "step": 952 }, { "epoch": 0.24120475828904075, "grad_norm": 0.14690545201301575, "learning_rate": 1.9792647887010066e-05, "loss": 0.5423, "step": 953 }, { "epoch": 0.24145785876993167, "grad_norm": 0.15458691120147705, "learning_rate": 1.9792162553403527e-05, "loss": 0.5876, "step": 954 }, { "epoch": 0.2417109592508226, "grad_norm": 0.14689841866493225, "learning_rate": 1.979167665843308e-05, "loss": 0.5349, "step": 955 }, { "epoch": 0.24196405973171348, "grad_norm": 0.14967821538448334, "learning_rate": 1.9791190202126578e-05, "loss": 0.5568, "step": 956 }, { "epoch": 0.2422171602126044, "grad_norm": 0.1518346518278122, "learning_rate": 1.9790703184511916e-05, "loss": 0.5653, "step": 957 }, { "epoch": 0.2424702606934953, "grad_norm": 0.14848634600639343, "learning_rate": 1.9790215605617007e-05, "loss": 0.5578, "step": 958 }, { "epoch": 0.24272336117438623, "grad_norm": 0.14639200270175934, "learning_rate": 1.97897274654698e-05, "loss": 0.5557, "step": 959 }, { "epoch": 0.24297646165527714, "grad_norm": 0.15468524396419525, "learning_rate": 1.9789238764098292e-05, "loss": 0.5588, "step": 960 }, { "epoch": 0.24322956213616806, "grad_norm": 0.1442486047744751, "learning_rate": 1.9788749501530488e-05, "loss": 0.5539, "step": 961 }, { "epoch": 0.24348266261705898, "grad_norm": 0.1456901878118515, "learning_rate": 1.9788259677794436e-05, "loss": 0.5666, "step": 962 }, { "epoch": 0.2437357630979499, "grad_norm": 0.1506296843290329, "learning_rate": 1.9787769292918222e-05, "loss": 0.5503, "step": 963 }, { "epoch": 0.2439888635788408, "grad_norm": 0.14512263238430023, "learning_rate": 1.9787278346929956e-05, "loss": 0.5555, "step": 964 }, { "epoch": 0.24424196405973173, "grad_norm": 0.14852070808410645, "learning_rate": 1.9786786839857785e-05, "loss": 0.5746, "step": 965 }, { "epoch": 0.24449506454062261, "grad_norm": 0.14244824647903442, "learning_rate": 1.9786294771729886e-05, "loss": 0.5293, "step": 966 }, { "epoch": 0.24474816502151353, "grad_norm": 0.16617433726787567, "learning_rate": 1.9785802142574464e-05, "loss": 0.6001, "step": 967 }, { "epoch": 0.24500126550240445, "grad_norm": 0.15356621146202087, "learning_rate": 1.9785308952419764e-05, "loss": 0.5785, "step": 968 }, { "epoch": 0.24525436598329536, "grad_norm": 0.17322207987308502, "learning_rate": 1.9784815201294058e-05, "loss": 0.5471, "step": 969 }, { "epoch": 0.24550746646418628, "grad_norm": 0.14599476754665375, "learning_rate": 1.9784320889225655e-05, "loss": 0.5515, "step": 970 }, { "epoch": 0.2457605669450772, "grad_norm": 0.15090207755565643, "learning_rate": 1.978382601624289e-05, "loss": 0.5684, "step": 971 }, { "epoch": 0.2460136674259681, "grad_norm": 0.1482008546590805, "learning_rate": 1.9783330582374128e-05, "loss": 0.5457, "step": 972 }, { "epoch": 0.24626676790685903, "grad_norm": 0.17851801216602325, "learning_rate": 1.9782834587647782e-05, "loss": 0.5695, "step": 973 }, { "epoch": 0.24651986838774995, "grad_norm": 0.14859318733215332, "learning_rate": 1.9782338032092282e-05, "loss": 0.5883, "step": 974 }, { "epoch": 0.24677296886864086, "grad_norm": 0.15535728633403778, "learning_rate": 1.978184091573609e-05, "loss": 0.5583, "step": 975 }, { "epoch": 0.24702606934953175, "grad_norm": 0.15031661093235016, "learning_rate": 1.9781343238607708e-05, "loss": 0.5608, "step": 976 }, { "epoch": 0.24727916983042267, "grad_norm": 0.15604668855667114, "learning_rate": 1.978084500073567e-05, "loss": 0.5537, "step": 977 }, { "epoch": 0.24753227031131358, "grad_norm": 0.14635708928108215, "learning_rate": 1.9780346202148533e-05, "loss": 0.5273, "step": 978 }, { "epoch": 0.2477853707922045, "grad_norm": 0.1487453132867813, "learning_rate": 1.9779846842874895e-05, "loss": 0.5561, "step": 979 }, { "epoch": 0.24803847127309542, "grad_norm": 0.15110675990581512, "learning_rate": 1.9779346922943384e-05, "loss": 0.5223, "step": 980 }, { "epoch": 0.24829157175398633, "grad_norm": 0.15664315223693848, "learning_rate": 1.9778846442382656e-05, "loss": 0.5848, "step": 981 }, { "epoch": 0.24854467223487725, "grad_norm": 0.15137054026126862, "learning_rate": 1.9778345401221407e-05, "loss": 0.5318, "step": 982 }, { "epoch": 0.24879777271576817, "grad_norm": 0.14949896931648254, "learning_rate": 1.9777843799488354e-05, "loss": 0.5627, "step": 983 }, { "epoch": 0.24905087319665908, "grad_norm": 0.15440170466899872, "learning_rate": 1.9777341637212264e-05, "loss": 0.5306, "step": 984 }, { "epoch": 0.24930397367755, "grad_norm": 0.14904992282390594, "learning_rate": 1.9776838914421913e-05, "loss": 0.5559, "step": 985 }, { "epoch": 0.2495570741584409, "grad_norm": 0.14369764924049377, "learning_rate": 1.9776335631146128e-05, "loss": 0.5448, "step": 986 }, { "epoch": 0.2498101746393318, "grad_norm": 0.15963445603847504, "learning_rate": 1.9775831787413757e-05, "loss": 0.5561, "step": 987 }, { "epoch": 0.25006327512022275, "grad_norm": 0.15341433882713318, "learning_rate": 1.977532738325369e-05, "loss": 0.5736, "step": 988 }, { "epoch": 0.25031637560111364, "grad_norm": 0.14597854018211365, "learning_rate": 1.977482241869484e-05, "loss": 0.5471, "step": 989 }, { "epoch": 0.2505694760820046, "grad_norm": 0.15139272809028625, "learning_rate": 1.9774316893766152e-05, "loss": 0.5557, "step": 990 }, { "epoch": 0.25082257656289547, "grad_norm": 0.14927662909030914, "learning_rate": 1.9773810808496612e-05, "loss": 0.5761, "step": 991 }, { "epoch": 0.25107567704378636, "grad_norm": 0.14922501146793365, "learning_rate": 1.977330416291523e-05, "loss": 0.5638, "step": 992 }, { "epoch": 0.2513287775246773, "grad_norm": 0.16757948696613312, "learning_rate": 1.9772796957051055e-05, "loss": 0.555, "step": 993 }, { "epoch": 0.2515818780055682, "grad_norm": 0.15029583871364594, "learning_rate": 1.977228919093316e-05, "loss": 0.545, "step": 994 }, { "epoch": 0.25183497848645914, "grad_norm": 0.14153912663459778, "learning_rate": 1.9771780864590654e-05, "loss": 0.5454, "step": 995 }, { "epoch": 0.25208807896735, "grad_norm": 0.147403284907341, "learning_rate": 1.9771271978052677e-05, "loss": 0.6348, "step": 996 }, { "epoch": 0.25234117944824097, "grad_norm": 0.15162605047225952, "learning_rate": 1.977076253134841e-05, "loss": 0.5687, "step": 997 }, { "epoch": 0.25259427992913186, "grad_norm": 0.1470058113336563, "learning_rate": 1.977025252450705e-05, "loss": 0.5339, "step": 998 }, { "epoch": 0.2528473804100228, "grad_norm": 0.15223732590675354, "learning_rate": 1.976974195755784e-05, "loss": 0.5607, "step": 999 }, { "epoch": 0.2531004808909137, "grad_norm": 0.14976049959659576, "learning_rate": 1.9769230830530044e-05, "loss": 0.5582, "step": 1000 }, { "epoch": 0.2533535813718046, "grad_norm": 0.14229172468185425, "learning_rate": 1.976871914345297e-05, "loss": 0.5762, "step": 1001 }, { "epoch": 0.2536066818526955, "grad_norm": 0.147952601313591, "learning_rate": 1.9768206896355945e-05, "loss": 0.5771, "step": 1002 }, { "epoch": 0.2538597823335864, "grad_norm": 0.14495033025741577, "learning_rate": 1.9767694089268346e-05, "loss": 0.5754, "step": 1003 }, { "epoch": 0.25411288281447736, "grad_norm": 0.14356327056884766, "learning_rate": 1.976718072221956e-05, "loss": 0.5333, "step": 1004 }, { "epoch": 0.25436598329536825, "grad_norm": 0.1483432650566101, "learning_rate": 1.9766666795239026e-05, "loss": 0.5369, "step": 1005 }, { "epoch": 0.2546190837762592, "grad_norm": 0.14932604134082794, "learning_rate": 1.9766152308356198e-05, "loss": 0.5582, "step": 1006 }, { "epoch": 0.2548721842571501, "grad_norm": 0.14025354385375977, "learning_rate": 1.9765637261600577e-05, "loss": 0.5303, "step": 1007 }, { "epoch": 0.255125284738041, "grad_norm": 0.14725551009178162, "learning_rate": 1.9765121655001683e-05, "loss": 0.5725, "step": 1008 }, { "epoch": 0.2553783852189319, "grad_norm": 0.15981052815914154, "learning_rate": 1.9764605488589083e-05, "loss": 0.5395, "step": 1009 }, { "epoch": 0.2556314856998228, "grad_norm": 0.14977654814720154, "learning_rate": 1.976408876239236e-05, "loss": 0.5677, "step": 1010 }, { "epoch": 0.25588458618071375, "grad_norm": 0.14924289286136627, "learning_rate": 1.9763571476441144e-05, "loss": 0.5729, "step": 1011 }, { "epoch": 0.25613768666160464, "grad_norm": 0.1671869158744812, "learning_rate": 1.9763053630765085e-05, "loss": 0.5458, "step": 1012 }, { "epoch": 0.2563907871424956, "grad_norm": 0.22122712433338165, "learning_rate": 1.976253522539387e-05, "loss": 0.5681, "step": 1013 }, { "epoch": 0.25664388762338647, "grad_norm": 0.18188539147377014, "learning_rate": 1.9762016260357222e-05, "loss": 0.5387, "step": 1014 }, { "epoch": 0.2568969881042774, "grad_norm": 0.149360790848732, "learning_rate": 1.9761496735684886e-05, "loss": 0.5721, "step": 1015 }, { "epoch": 0.2571500885851683, "grad_norm": 0.15543420612812042, "learning_rate": 1.976097665140665e-05, "loss": 0.5607, "step": 1016 }, { "epoch": 0.25740318906605925, "grad_norm": 0.1418234258890152, "learning_rate": 1.976045600755233e-05, "loss": 0.5512, "step": 1017 }, { "epoch": 0.25765628954695013, "grad_norm": 0.15278293192386627, "learning_rate": 1.975993480415177e-05, "loss": 0.5859, "step": 1018 }, { "epoch": 0.2579093900278411, "grad_norm": 0.14391061663627625, "learning_rate": 1.975941304123485e-05, "loss": 0.5694, "step": 1019 }, { "epoch": 0.25816249050873197, "grad_norm": 0.14631696045398712, "learning_rate": 1.975889071883148e-05, "loss": 0.5629, "step": 1020 }, { "epoch": 0.25841559098962286, "grad_norm": 0.1435028314590454, "learning_rate": 1.975836783697161e-05, "loss": 0.5582, "step": 1021 }, { "epoch": 0.2586686914705138, "grad_norm": 0.15036992728710175, "learning_rate": 1.975784439568521e-05, "loss": 0.5799, "step": 1022 }, { "epoch": 0.2589217919514047, "grad_norm": 0.17731989920139313, "learning_rate": 1.9757320395002288e-05, "loss": 0.5387, "step": 1023 }, { "epoch": 0.25917489243229563, "grad_norm": 0.13910239934921265, "learning_rate": 1.9756795834952892e-05, "loss": 0.5487, "step": 1024 }, { "epoch": 0.2594279929131865, "grad_norm": 0.1503012776374817, "learning_rate": 1.975627071556708e-05, "loss": 0.5585, "step": 1025 }, { "epoch": 0.25968109339407747, "grad_norm": 0.14616143703460693, "learning_rate": 1.9755745036874967e-05, "loss": 0.5525, "step": 1026 }, { "epoch": 0.25993419387496836, "grad_norm": 0.14453400671482086, "learning_rate": 1.9755218798906683e-05, "loss": 0.574, "step": 1027 }, { "epoch": 0.2601872943558593, "grad_norm": 0.14514601230621338, "learning_rate": 1.97546920016924e-05, "loss": 0.5384, "step": 1028 }, { "epoch": 0.2604403948367502, "grad_norm": 0.14687170088291168, "learning_rate": 1.9754164645262316e-05, "loss": 0.5624, "step": 1029 }, { "epoch": 0.2606934953176411, "grad_norm": 0.1529356688261032, "learning_rate": 1.9753636729646665e-05, "loss": 0.6067, "step": 1030 }, { "epoch": 0.260946595798532, "grad_norm": 0.16056649386882782, "learning_rate": 1.975310825487571e-05, "loss": 0.5768, "step": 1031 }, { "epoch": 0.2611996962794229, "grad_norm": 0.1485505998134613, "learning_rate": 1.9752579220979746e-05, "loss": 0.5675, "step": 1032 }, { "epoch": 0.26145279676031385, "grad_norm": 0.15249554812908173, "learning_rate": 1.9752049627989106e-05, "loss": 0.5823, "step": 1033 }, { "epoch": 0.26170589724120474, "grad_norm": 0.15143629908561707, "learning_rate": 1.9751519475934143e-05, "loss": 0.6127, "step": 1034 }, { "epoch": 0.2619589977220957, "grad_norm": 0.16558900475502014, "learning_rate": 1.9750988764845257e-05, "loss": 0.5608, "step": 1035 }, { "epoch": 0.2622120982029866, "grad_norm": 0.15004250407218933, "learning_rate": 1.975045749475287e-05, "loss": 0.5455, "step": 1036 }, { "epoch": 0.2624651986838775, "grad_norm": 0.144332155585289, "learning_rate": 1.9749925665687436e-05, "loss": 0.5497, "step": 1037 }, { "epoch": 0.2627182991647684, "grad_norm": 0.14648112654685974, "learning_rate": 1.9749393277679445e-05, "loss": 0.5332, "step": 1038 }, { "epoch": 0.26297139964565935, "grad_norm": 0.1481088399887085, "learning_rate": 1.9748860330759417e-05, "loss": 0.5823, "step": 1039 }, { "epoch": 0.26322450012655024, "grad_norm": 0.1628430187702179, "learning_rate": 1.974832682495791e-05, "loss": 0.5508, "step": 1040 }, { "epoch": 0.26347760060744113, "grad_norm": 0.14736312627792358, "learning_rate": 1.9747792760305504e-05, "loss": 0.5527, "step": 1041 }, { "epoch": 0.2637307010883321, "grad_norm": 0.13812898099422455, "learning_rate": 1.974725813683281e-05, "loss": 0.5156, "step": 1042 }, { "epoch": 0.26398380156922296, "grad_norm": 0.16094870865345, "learning_rate": 1.974672295457049e-05, "loss": 0.5695, "step": 1043 }, { "epoch": 0.2642369020501139, "grad_norm": 0.1549401581287384, "learning_rate": 1.974618721354922e-05, "loss": 0.5764, "step": 1044 }, { "epoch": 0.2644900025310048, "grad_norm": 0.1427125632762909, "learning_rate": 1.9745650913799706e-05, "loss": 0.5509, "step": 1045 }, { "epoch": 0.26474310301189574, "grad_norm": 0.1518612951040268, "learning_rate": 1.97451140553527e-05, "loss": 0.5554, "step": 1046 }, { "epoch": 0.26499620349278663, "grad_norm": 0.149056077003479, "learning_rate": 1.9744576638238975e-05, "loss": 0.5316, "step": 1047 }, { "epoch": 0.2652493039736776, "grad_norm": 0.1529719978570938, "learning_rate": 1.9744038662489344e-05, "loss": 0.5777, "step": 1048 }, { "epoch": 0.26550240445456846, "grad_norm": 0.15761038661003113, "learning_rate": 1.9743500128134646e-05, "loss": 0.559, "step": 1049 }, { "epoch": 0.26575550493545935, "grad_norm": 0.16031967103481293, "learning_rate": 1.9742961035205753e-05, "loss": 0.5575, "step": 1050 }, { "epoch": 0.2660086054163503, "grad_norm": 0.1591987907886505, "learning_rate": 1.9742421383733572e-05, "loss": 0.5489, "step": 1051 }, { "epoch": 0.2662617058972412, "grad_norm": 0.14696821570396423, "learning_rate": 1.974188117374904e-05, "loss": 0.5625, "step": 1052 }, { "epoch": 0.26651480637813213, "grad_norm": 0.1537933647632599, "learning_rate": 1.9741340405283123e-05, "loss": 0.5595, "step": 1053 }, { "epoch": 0.266767906859023, "grad_norm": 0.14726348221302032, "learning_rate": 1.9740799078366827e-05, "loss": 0.5377, "step": 1054 }, { "epoch": 0.26702100733991396, "grad_norm": 0.14925776422023773, "learning_rate": 1.974025719303118e-05, "loss": 0.5792, "step": 1055 }, { "epoch": 0.26727410782080485, "grad_norm": 0.14656715095043182, "learning_rate": 1.9739714749307248e-05, "loss": 0.5509, "step": 1056 }, { "epoch": 0.2675272083016958, "grad_norm": 0.14511851966381073, "learning_rate": 1.973917174722613e-05, "loss": 0.541, "step": 1057 }, { "epoch": 0.2677803087825867, "grad_norm": 0.15105004608631134, "learning_rate": 1.973862818681896e-05, "loss": 0.5634, "step": 1058 }, { "epoch": 0.2680334092634776, "grad_norm": 0.15590152144432068, "learning_rate": 1.9738084068116888e-05, "loss": 0.5651, "step": 1059 }, { "epoch": 0.2682865097443685, "grad_norm": 0.14832177758216858, "learning_rate": 1.973753939115112e-05, "loss": 0.564, "step": 1060 }, { "epoch": 0.2685396102252594, "grad_norm": 0.1544780135154724, "learning_rate": 1.9736994155952868e-05, "loss": 0.5605, "step": 1061 }, { "epoch": 0.26879271070615035, "grad_norm": 0.1504780650138855, "learning_rate": 1.97364483625534e-05, "loss": 0.5283, "step": 1062 }, { "epoch": 0.26904581118704124, "grad_norm": 0.14311392605304718, "learning_rate": 1.9735902010983995e-05, "loss": 0.5329, "step": 1063 }, { "epoch": 0.2692989116679322, "grad_norm": 0.14662767946720123, "learning_rate": 1.9735355101275987e-05, "loss": 0.5584, "step": 1064 }, { "epoch": 0.2695520121488231, "grad_norm": 0.17058195173740387, "learning_rate": 1.9734807633460717e-05, "loss": 0.5709, "step": 1065 }, { "epoch": 0.269805112629714, "grad_norm": 0.14818315207958221, "learning_rate": 1.973425960756958e-05, "loss": 0.5637, "step": 1066 }, { "epoch": 0.2700582131106049, "grad_norm": 0.15794546902179718, "learning_rate": 1.9733711023633985e-05, "loss": 0.5734, "step": 1067 }, { "epoch": 0.27031131359149585, "grad_norm": 0.1491701602935791, "learning_rate": 1.9733161881685383e-05, "loss": 0.5565, "step": 1068 }, { "epoch": 0.27056441407238674, "grad_norm": 0.15116646885871887, "learning_rate": 1.973261218175526e-05, "loss": 0.5499, "step": 1069 }, { "epoch": 0.2708175145532776, "grad_norm": 0.1520608812570572, "learning_rate": 1.9732061923875126e-05, "loss": 0.5686, "step": 1070 }, { "epoch": 0.27107061503416857, "grad_norm": 0.1513378620147705, "learning_rate": 1.973151110807652e-05, "loss": 0.566, "step": 1071 }, { "epoch": 0.27132371551505946, "grad_norm": 0.15130463242530823, "learning_rate": 1.9730959734391032e-05, "loss": 0.572, "step": 1072 }, { "epoch": 0.2715768159959504, "grad_norm": 0.14602863788604736, "learning_rate": 1.973040780285026e-05, "loss": 0.5447, "step": 1073 }, { "epoch": 0.2718299164768413, "grad_norm": 0.1462998390197754, "learning_rate": 1.9729855313485853e-05, "loss": 0.5335, "step": 1074 }, { "epoch": 0.27208301695773224, "grad_norm": 0.14722299575805664, "learning_rate": 1.972930226632948e-05, "loss": 0.5566, "step": 1075 }, { "epoch": 0.2723361174386231, "grad_norm": 0.14984026551246643, "learning_rate": 1.972874866141284e-05, "loss": 0.5535, "step": 1076 }, { "epoch": 0.27258921791951407, "grad_norm": 0.1488526165485382, "learning_rate": 1.9728194498767682e-05, "loss": 0.5658, "step": 1077 }, { "epoch": 0.27284231840040496, "grad_norm": 0.15000055730342865, "learning_rate": 1.972763977842577e-05, "loss": 0.5566, "step": 1078 }, { "epoch": 0.27309541888129585, "grad_norm": 0.14696164429187775, "learning_rate": 1.9727084500418902e-05, "loss": 0.5505, "step": 1079 }, { "epoch": 0.2733485193621868, "grad_norm": 0.14558060467243195, "learning_rate": 1.9726528664778916e-05, "loss": 0.5552, "step": 1080 }, { "epoch": 0.2736016198430777, "grad_norm": 0.14551407098770142, "learning_rate": 1.972597227153767e-05, "loss": 0.5329, "step": 1081 }, { "epoch": 0.2738547203239686, "grad_norm": 0.14947450160980225, "learning_rate": 1.9725415320727067e-05, "loss": 0.5746, "step": 1082 }, { "epoch": 0.2741078208048595, "grad_norm": 0.14890162646770477, "learning_rate": 1.972485781237903e-05, "loss": 0.5518, "step": 1083 }, { "epoch": 0.27436092128575046, "grad_norm": 0.15217174589633942, "learning_rate": 1.972429974652553e-05, "loss": 0.5623, "step": 1084 }, { "epoch": 0.27461402176664135, "grad_norm": 0.1404881477355957, "learning_rate": 1.9723741123198548e-05, "loss": 0.5472, "step": 1085 }, { "epoch": 0.2748671222475323, "grad_norm": 0.14228922128677368, "learning_rate": 1.9723181942430117e-05, "loss": 0.5433, "step": 1086 }, { "epoch": 0.2751202227284232, "grad_norm": 0.1508197784423828, "learning_rate": 1.9722622204252285e-05, "loss": 0.5471, "step": 1087 }, { "epoch": 0.2753733232093141, "grad_norm": 0.1493370085954666, "learning_rate": 1.972206190869715e-05, "loss": 0.5522, "step": 1088 }, { "epoch": 0.275626423690205, "grad_norm": 0.1405772864818573, "learning_rate": 1.972150105579683e-05, "loss": 0.5476, "step": 1089 }, { "epoch": 0.2758795241710959, "grad_norm": 0.1434200257062912, "learning_rate": 1.9720939645583477e-05, "loss": 0.5685, "step": 1090 }, { "epoch": 0.27613262465198685, "grad_norm": 0.15157896280288696, "learning_rate": 1.972037767808927e-05, "loss": 0.5849, "step": 1091 }, { "epoch": 0.27638572513287774, "grad_norm": 0.15583936870098114, "learning_rate": 1.971981515334643e-05, "loss": 0.5486, "step": 1092 }, { "epoch": 0.2766388256137687, "grad_norm": 0.1595613956451416, "learning_rate": 1.9719252071387213e-05, "loss": 0.5666, "step": 1093 }, { "epoch": 0.27689192609465957, "grad_norm": 0.14364835619926453, "learning_rate": 1.9718688432243883e-05, "loss": 0.5715, "step": 1094 }, { "epoch": 0.2771450265755505, "grad_norm": 0.14432287216186523, "learning_rate": 1.9718124235948765e-05, "loss": 0.5561, "step": 1095 }, { "epoch": 0.2773981270564414, "grad_norm": 0.1456945687532425, "learning_rate": 1.97175594825342e-05, "loss": 0.5504, "step": 1096 }, { "epoch": 0.27765122753733235, "grad_norm": 0.14358799159526825, "learning_rate": 1.971699417203256e-05, "loss": 0.5498, "step": 1097 }, { "epoch": 0.27790432801822323, "grad_norm": 0.15574151277542114, "learning_rate": 1.9716428304476255e-05, "loss": 0.5538, "step": 1098 }, { "epoch": 0.2781574284991141, "grad_norm": 0.14136652648448944, "learning_rate": 1.971586187989773e-05, "loss": 0.5256, "step": 1099 }, { "epoch": 0.27841052898000507, "grad_norm": 0.14398375153541565, "learning_rate": 1.9715294898329453e-05, "loss": 0.5811, "step": 1100 }, { "epoch": 0.27866362946089596, "grad_norm": 0.15071871876716614, "learning_rate": 1.9714727359803926e-05, "loss": 0.5514, "step": 1101 }, { "epoch": 0.2789167299417869, "grad_norm": 0.14579877257347107, "learning_rate": 1.971415926435369e-05, "loss": 0.5469, "step": 1102 }, { "epoch": 0.2791698304226778, "grad_norm": 0.14999797940254211, "learning_rate": 1.9713590612011306e-05, "loss": 0.5769, "step": 1103 }, { "epoch": 0.27942293090356873, "grad_norm": 0.14470462501049042, "learning_rate": 1.9713021402809378e-05, "loss": 0.5819, "step": 1104 }, { "epoch": 0.2796760313844596, "grad_norm": 0.1511867791414261, "learning_rate": 1.9712451636780536e-05, "loss": 0.5677, "step": 1105 }, { "epoch": 0.27992913186535057, "grad_norm": 0.1428070068359375, "learning_rate": 1.9711881313957442e-05, "loss": 0.5517, "step": 1106 }, { "epoch": 0.28018223234624146, "grad_norm": 0.1462501436471939, "learning_rate": 1.97113104343728e-05, "loss": 0.5535, "step": 1107 }, { "epoch": 0.28043533282713234, "grad_norm": 0.14507222175598145, "learning_rate": 1.9710738998059326e-05, "loss": 0.5755, "step": 1108 }, { "epoch": 0.2806884333080233, "grad_norm": 0.15307988226413727, "learning_rate": 1.9710167005049786e-05, "loss": 0.5374, "step": 1109 }, { "epoch": 0.2809415337889142, "grad_norm": 0.14656472206115723, "learning_rate": 1.970959445537697e-05, "loss": 0.5394, "step": 1110 }, { "epoch": 0.2811946342698051, "grad_norm": 0.3027561604976654, "learning_rate": 1.9709021349073697e-05, "loss": 0.5404, "step": 1111 }, { "epoch": 0.281447734750696, "grad_norm": 0.14776207506656647, "learning_rate": 1.970844768617283e-05, "loss": 0.5317, "step": 1112 }, { "epoch": 0.28170083523158695, "grad_norm": 0.14800283312797546, "learning_rate": 1.9707873466707247e-05, "loss": 0.5876, "step": 1113 }, { "epoch": 0.28195393571247784, "grad_norm": 0.14399170875549316, "learning_rate": 1.9707298690709874e-05, "loss": 0.551, "step": 1114 }, { "epoch": 0.2822070361933688, "grad_norm": 0.1502523124217987, "learning_rate": 1.9706723358213654e-05, "loss": 0.5611, "step": 1115 }, { "epoch": 0.2824601366742597, "grad_norm": 0.15089723467826843, "learning_rate": 1.9706147469251577e-05, "loss": 0.5505, "step": 1116 }, { "epoch": 0.2827132371551506, "grad_norm": 0.13856129348278046, "learning_rate": 1.9705571023856656e-05, "loss": 0.5112, "step": 1117 }, { "epoch": 0.2829663376360415, "grad_norm": 0.14749446511268616, "learning_rate": 1.9704994022061938e-05, "loss": 0.5543, "step": 1118 }, { "epoch": 0.2832194381169324, "grad_norm": 0.1500622034072876, "learning_rate": 1.970441646390049e-05, "loss": 0.5463, "step": 1119 }, { "epoch": 0.28347253859782334, "grad_norm": 0.14998632669448853, "learning_rate": 1.970383834940544e-05, "loss": 0.5653, "step": 1120 }, { "epoch": 0.28372563907871423, "grad_norm": 0.14342360198497772, "learning_rate": 1.970325967860992e-05, "loss": 0.5488, "step": 1121 }, { "epoch": 0.2839787395596052, "grad_norm": 0.16454951465129852, "learning_rate": 1.9702680451547103e-05, "loss": 0.562, "step": 1122 }, { "epoch": 0.28423184004049606, "grad_norm": 0.14895156025886536, "learning_rate": 1.97021006682502e-05, "loss": 0.5917, "step": 1123 }, { "epoch": 0.284484940521387, "grad_norm": 0.15073780715465546, "learning_rate": 1.9701520328752446e-05, "loss": 0.5855, "step": 1124 }, { "epoch": 0.2847380410022779, "grad_norm": 0.15511606633663177, "learning_rate": 1.9700939433087112e-05, "loss": 0.5347, "step": 1125 }, { "epoch": 0.28499114148316884, "grad_norm": 0.14976346492767334, "learning_rate": 1.9700357981287498e-05, "loss": 0.5744, "step": 1126 }, { "epoch": 0.28524424196405973, "grad_norm": 0.1468544751405716, "learning_rate": 1.9699775973386935e-05, "loss": 0.5385, "step": 1127 }, { "epoch": 0.2854973424449506, "grad_norm": 0.15148510038852692, "learning_rate": 1.9699193409418793e-05, "loss": 0.5484, "step": 1128 }, { "epoch": 0.28575044292584156, "grad_norm": 0.1473529189825058, "learning_rate": 1.9698610289416466e-05, "loss": 0.5308, "step": 1129 }, { "epoch": 0.28600354340673245, "grad_norm": 0.1504306197166443, "learning_rate": 1.9698026613413384e-05, "loss": 0.5399, "step": 1130 }, { "epoch": 0.2862566438876234, "grad_norm": 0.15331873297691345, "learning_rate": 1.969744238144301e-05, "loss": 0.5695, "step": 1131 }, { "epoch": 0.2865097443685143, "grad_norm": 0.17417678236961365, "learning_rate": 1.9696857593538836e-05, "loss": 0.5674, "step": 1132 }, { "epoch": 0.28676284484940523, "grad_norm": 0.1782664656639099, "learning_rate": 1.9696272249734383e-05, "loss": 0.5739, "step": 1133 }, { "epoch": 0.2870159453302961, "grad_norm": 0.20433051884174347, "learning_rate": 1.969568635006321e-05, "loss": 0.5494, "step": 1134 }, { "epoch": 0.28726904581118706, "grad_norm": 0.16913430392742157, "learning_rate": 1.9695099894558907e-05, "loss": 0.5318, "step": 1135 }, { "epoch": 0.28752214629207795, "grad_norm": 0.14963854849338531, "learning_rate": 1.9694512883255094e-05, "loss": 0.5509, "step": 1136 }, { "epoch": 0.2877752467729689, "grad_norm": 0.1432947814464569, "learning_rate": 1.969392531618542e-05, "loss": 0.5402, "step": 1137 }, { "epoch": 0.2880283472538598, "grad_norm": 0.14945542812347412, "learning_rate": 1.969333719338357e-05, "loss": 0.5601, "step": 1138 }, { "epoch": 0.2882814477347507, "grad_norm": 0.1545867770910263, "learning_rate": 1.9692748514883258e-05, "loss": 0.5447, "step": 1139 }, { "epoch": 0.2885345482156416, "grad_norm": 0.14961642026901245, "learning_rate": 1.9692159280718237e-05, "loss": 0.5686, "step": 1140 }, { "epoch": 0.2887876486965325, "grad_norm": 0.15187720954418182, "learning_rate": 1.9691569490922283e-05, "loss": 0.5876, "step": 1141 }, { "epoch": 0.28904074917742345, "grad_norm": 0.14029815793037415, "learning_rate": 1.9690979145529213e-05, "loss": 0.5591, "step": 1142 }, { "epoch": 0.28929384965831434, "grad_norm": 0.14870896935462952, "learning_rate": 1.969038824457286e-05, "loss": 0.5839, "step": 1143 }, { "epoch": 0.2895469501392053, "grad_norm": 0.146173894405365, "learning_rate": 1.9689796788087106e-05, "loss": 0.5531, "step": 1144 }, { "epoch": 0.2898000506200962, "grad_norm": 0.14388002455234528, "learning_rate": 1.968920477610586e-05, "loss": 0.5314, "step": 1145 }, { "epoch": 0.2900531511009871, "grad_norm": 0.14754292368888855, "learning_rate": 1.9688612208663052e-05, "loss": 0.5731, "step": 1146 }, { "epoch": 0.290306251581878, "grad_norm": 0.14382413029670715, "learning_rate": 1.968801908579266e-05, "loss": 0.541, "step": 1147 }, { "epoch": 0.2905593520627689, "grad_norm": 0.1487230360507965, "learning_rate": 1.9687425407528685e-05, "loss": 0.5698, "step": 1148 }, { "epoch": 0.29081245254365984, "grad_norm": 0.15192517638206482, "learning_rate": 1.9686831173905162e-05, "loss": 0.5622, "step": 1149 }, { "epoch": 0.2910655530245507, "grad_norm": 0.14377538859844208, "learning_rate": 1.9686236384956156e-05, "loss": 0.5611, "step": 1150 }, { "epoch": 0.29131865350544167, "grad_norm": 0.15064801275730133, "learning_rate": 1.9685641040715765e-05, "loss": 0.5642, "step": 1151 }, { "epoch": 0.29157175398633256, "grad_norm": 0.14578548073768616, "learning_rate": 1.9685045141218114e-05, "loss": 0.5795, "step": 1152 }, { "epoch": 0.2918248544672235, "grad_norm": 0.20898699760437012, "learning_rate": 1.9684448686497377e-05, "loss": 0.5623, "step": 1153 }, { "epoch": 0.2920779549481144, "grad_norm": 0.14994247257709503, "learning_rate": 1.9683851676587732e-05, "loss": 0.5562, "step": 1154 }, { "epoch": 0.29233105542900534, "grad_norm": 0.146097332239151, "learning_rate": 1.9683254111523417e-05, "loss": 0.5474, "step": 1155 }, { "epoch": 0.2925841559098962, "grad_norm": 0.1490073949098587, "learning_rate": 1.968265599133868e-05, "loss": 0.5753, "step": 1156 }, { "epoch": 0.2928372563907871, "grad_norm": 0.14542317390441895, "learning_rate": 1.968205731606782e-05, "loss": 0.5386, "step": 1157 }, { "epoch": 0.29309035687167806, "grad_norm": 0.15466979146003723, "learning_rate": 1.9681458085745148e-05, "loss": 0.5305, "step": 1158 }, { "epoch": 0.29334345735256895, "grad_norm": 0.13925255835056305, "learning_rate": 1.9680858300405027e-05, "loss": 0.5257, "step": 1159 }, { "epoch": 0.2935965578334599, "grad_norm": 0.14682196080684662, "learning_rate": 1.9680257960081828e-05, "loss": 0.5431, "step": 1160 }, { "epoch": 0.2938496583143508, "grad_norm": 0.15029916167259216, "learning_rate": 1.9679657064809977e-05, "loss": 0.5535, "step": 1161 }, { "epoch": 0.2941027587952417, "grad_norm": 0.14853662252426147, "learning_rate": 1.9679055614623918e-05, "loss": 0.5311, "step": 1162 }, { "epoch": 0.2943558592761326, "grad_norm": 0.1501016914844513, "learning_rate": 1.9678453609558136e-05, "loss": 0.5555, "step": 1163 }, { "epoch": 0.29460895975702356, "grad_norm": 0.15274439752101898, "learning_rate": 1.967785104964713e-05, "loss": 0.535, "step": 1164 }, { "epoch": 0.29486206023791445, "grad_norm": 0.18682987987995148, "learning_rate": 1.967724793492546e-05, "loss": 0.5742, "step": 1165 }, { "epoch": 0.2951151607188054, "grad_norm": 0.14630046486854553, "learning_rate": 1.9676644265427692e-05, "loss": 0.5471, "step": 1166 }, { "epoch": 0.2953682611996963, "grad_norm": 1.4593929052352905, "learning_rate": 1.967604004118844e-05, "loss": 0.5525, "step": 1167 }, { "epoch": 0.29562136168058717, "grad_norm": 0.1469281017780304, "learning_rate": 1.967543526224233e-05, "loss": 0.5748, "step": 1168 }, { "epoch": 0.2958744621614781, "grad_norm": 0.1593310683965683, "learning_rate": 1.9674829928624042e-05, "loss": 0.5467, "step": 1169 }, { "epoch": 0.296127562642369, "grad_norm": 0.15252695977687836, "learning_rate": 1.9674224040368277e-05, "loss": 0.5604, "step": 1170 }, { "epoch": 0.29638066312325995, "grad_norm": 0.14795660972595215, "learning_rate": 1.9673617597509774e-05, "loss": 0.5525, "step": 1171 }, { "epoch": 0.29663376360415084, "grad_norm": 0.14674055576324463, "learning_rate": 1.9673010600083287e-05, "loss": 0.5807, "step": 1172 }, { "epoch": 0.2968868640850418, "grad_norm": 0.1470927894115448, "learning_rate": 1.9672403048123624e-05, "loss": 0.5216, "step": 1173 }, { "epoch": 0.29713996456593267, "grad_norm": 0.15062348544597626, "learning_rate": 1.9671794941665613e-05, "loss": 0.5343, "step": 1174 }, { "epoch": 0.2973930650468236, "grad_norm": 0.16333657503128052, "learning_rate": 1.9671186280744114e-05, "loss": 0.5568, "step": 1175 }, { "epoch": 0.2976461655277145, "grad_norm": 0.1702549308538437, "learning_rate": 1.9670577065394018e-05, "loss": 0.549, "step": 1176 }, { "epoch": 0.2978992660086054, "grad_norm": 0.14794114232063293, "learning_rate": 1.9669967295650256e-05, "loss": 0.563, "step": 1177 }, { "epoch": 0.29815236648949633, "grad_norm": 0.14850711822509766, "learning_rate": 1.9669356971547778e-05, "loss": 0.5508, "step": 1178 }, { "epoch": 0.2984054669703872, "grad_norm": 0.1458379328250885, "learning_rate": 1.966874609312158e-05, "loss": 0.5793, "step": 1179 }, { "epoch": 0.29865856745127817, "grad_norm": 0.18383198976516724, "learning_rate": 1.9668134660406675e-05, "loss": 0.5607, "step": 1180 }, { "epoch": 0.29891166793216906, "grad_norm": 0.14815421402454376, "learning_rate": 1.966752267343812e-05, "loss": 0.5542, "step": 1181 }, { "epoch": 0.29916476841306, "grad_norm": 0.150907501578331, "learning_rate": 1.9666910132250995e-05, "loss": 0.5533, "step": 1182 }, { "epoch": 0.2994178688939509, "grad_norm": 0.1607259064912796, "learning_rate": 1.966629703688042e-05, "loss": 0.5646, "step": 1183 }, { "epoch": 0.29967096937484183, "grad_norm": 0.1456514447927475, "learning_rate": 1.966568338736154e-05, "loss": 0.5466, "step": 1184 }, { "epoch": 0.2999240698557327, "grad_norm": 0.14388611912727356, "learning_rate": 1.966506918372953e-05, "loss": 0.5208, "step": 1185 }, { "epoch": 0.30017717033662367, "grad_norm": 0.14943061769008636, "learning_rate": 1.9664454426019614e-05, "loss": 0.5642, "step": 1186 }, { "epoch": 0.30043027081751456, "grad_norm": 0.16205556690692902, "learning_rate": 1.966383911426702e-05, "loss": 0.5611, "step": 1187 }, { "epoch": 0.30068337129840544, "grad_norm": 0.15247951447963715, "learning_rate": 1.9663223248507034e-05, "loss": 0.5699, "step": 1188 }, { "epoch": 0.3009364717792964, "grad_norm": 0.1501820832490921, "learning_rate": 1.9662606828774956e-05, "loss": 0.574, "step": 1189 }, { "epoch": 0.3011895722601873, "grad_norm": 0.15078707039356232, "learning_rate": 1.9661989855106122e-05, "loss": 0.5715, "step": 1190 }, { "epoch": 0.3014426727410782, "grad_norm": 0.15016232430934906, "learning_rate": 1.966137232753591e-05, "loss": 0.5592, "step": 1191 }, { "epoch": 0.3016957732219691, "grad_norm": 0.1505821794271469, "learning_rate": 1.9660754246099715e-05, "loss": 0.5865, "step": 1192 }, { "epoch": 0.30194887370286005, "grad_norm": 0.2461215853691101, "learning_rate": 1.9660135610832966e-05, "loss": 0.5559, "step": 1193 }, { "epoch": 0.30220197418375094, "grad_norm": 0.15415988862514496, "learning_rate": 1.965951642177114e-05, "loss": 0.5582, "step": 1194 }, { "epoch": 0.3024550746646419, "grad_norm": 0.14573028683662415, "learning_rate": 1.9658896678949726e-05, "loss": 0.5366, "step": 1195 }, { "epoch": 0.3027081751455328, "grad_norm": 0.14445725083351135, "learning_rate": 1.9658276382404255e-05, "loss": 0.5591, "step": 1196 }, { "epoch": 0.30296127562642367, "grad_norm": 0.15039972960948944, "learning_rate": 1.9657655532170286e-05, "loss": 0.5732, "step": 1197 }, { "epoch": 0.3032143761073146, "grad_norm": 0.14960485696792603, "learning_rate": 1.965703412828341e-05, "loss": 0.5558, "step": 1198 }, { "epoch": 0.3034674765882055, "grad_norm": 0.14577481150627136, "learning_rate": 1.9656412170779254e-05, "loss": 0.5279, "step": 1199 }, { "epoch": 0.30372057706909644, "grad_norm": 0.14354942739009857, "learning_rate": 1.9655789659693473e-05, "loss": 0.5384, "step": 1200 }, { "epoch": 0.30397367754998733, "grad_norm": 0.1729145646095276, "learning_rate": 1.965516659506175e-05, "loss": 0.5649, "step": 1201 }, { "epoch": 0.3042267780308783, "grad_norm": 0.14442485570907593, "learning_rate": 1.965454297691981e-05, "loss": 0.5646, "step": 1202 }, { "epoch": 0.30447987851176916, "grad_norm": 0.14704465866088867, "learning_rate": 1.96539188053034e-05, "loss": 0.5459, "step": 1203 }, { "epoch": 0.3047329789926601, "grad_norm": 0.1534062772989273, "learning_rate": 1.9653294080248302e-05, "loss": 0.5841, "step": 1204 }, { "epoch": 0.304986079473551, "grad_norm": 0.15259750187397003, "learning_rate": 1.9652668801790334e-05, "loss": 0.5793, "step": 1205 }, { "epoch": 0.3052391799544419, "grad_norm": 0.1467946618795395, "learning_rate": 1.965204296996534e-05, "loss": 0.548, "step": 1206 }, { "epoch": 0.30549228043533283, "grad_norm": 0.15427266061306, "learning_rate": 1.9651416584809192e-05, "loss": 0.5533, "step": 1207 }, { "epoch": 0.3057453809162237, "grad_norm": 0.15036946535110474, "learning_rate": 1.9650789646357803e-05, "loss": 0.5539, "step": 1208 }, { "epoch": 0.30599848139711466, "grad_norm": 0.14675118029117584, "learning_rate": 1.965016215464712e-05, "loss": 0.5656, "step": 1209 }, { "epoch": 0.30625158187800555, "grad_norm": 0.1452735811471939, "learning_rate": 1.964953410971311e-05, "loss": 0.5528, "step": 1210 }, { "epoch": 0.3065046823588965, "grad_norm": 0.17103785276412964, "learning_rate": 1.964890551159178e-05, "loss": 0.5444, "step": 1211 }, { "epoch": 0.3067577828397874, "grad_norm": 0.1458415538072586, "learning_rate": 1.9648276360319163e-05, "loss": 0.5467, "step": 1212 }, { "epoch": 0.30701088332067833, "grad_norm": 0.14854510128498077, "learning_rate": 1.9647646655931327e-05, "loss": 0.5493, "step": 1213 }, { "epoch": 0.3072639838015692, "grad_norm": 0.14343342185020447, "learning_rate": 1.9647016398464377e-05, "loss": 0.5302, "step": 1214 }, { "epoch": 0.30751708428246016, "grad_norm": 0.17061619460582733, "learning_rate": 1.9646385587954437e-05, "loss": 0.5539, "step": 1215 }, { "epoch": 0.30777018476335105, "grad_norm": 0.14777931571006775, "learning_rate": 1.9645754224437675e-05, "loss": 0.5513, "step": 1216 }, { "epoch": 0.30802328524424194, "grad_norm": 0.14115074276924133, "learning_rate": 1.9645122307950283e-05, "loss": 0.5096, "step": 1217 }, { "epoch": 0.3082763857251329, "grad_norm": 0.14359930157661438, "learning_rate": 1.964448983852849e-05, "loss": 0.5517, "step": 1218 }, { "epoch": 0.3085294862060238, "grad_norm": 0.15274740755558014, "learning_rate": 1.9643856816208554e-05, "loss": 0.5539, "step": 1219 }, { "epoch": 0.3087825866869147, "grad_norm": 0.1473120003938675, "learning_rate": 1.964322324102676e-05, "loss": 0.5475, "step": 1220 }, { "epoch": 0.3090356871678056, "grad_norm": 0.14768411219120026, "learning_rate": 1.964258911301944e-05, "loss": 0.5569, "step": 1221 }, { "epoch": 0.30928878764869655, "grad_norm": 0.14215229451656342, "learning_rate": 1.9641954432222932e-05, "loss": 0.542, "step": 1222 }, { "epoch": 0.30954188812958744, "grad_norm": 0.15531481802463531, "learning_rate": 1.9641319198673634e-05, "loss": 0.5389, "step": 1223 }, { "epoch": 0.3097949886104784, "grad_norm": 0.14298652112483978, "learning_rate": 1.964068341240796e-05, "loss": 0.5622, "step": 1224 }, { "epoch": 0.31004808909136927, "grad_norm": 0.1486559808254242, "learning_rate": 1.964004707346235e-05, "loss": 0.5679, "step": 1225 }, { "epoch": 0.31030118957226016, "grad_norm": 0.14799034595489502, "learning_rate": 1.9639410181873296e-05, "loss": 0.5861, "step": 1226 }, { "epoch": 0.3105542900531511, "grad_norm": 0.15091361105442047, "learning_rate": 1.96387727376773e-05, "loss": 0.5825, "step": 1227 }, { "epoch": 0.310807390534042, "grad_norm": 0.1536412090063095, "learning_rate": 1.9638134740910914e-05, "loss": 0.5909, "step": 1228 }, { "epoch": 0.31106049101493294, "grad_norm": 0.15981240570545197, "learning_rate": 1.9637496191610703e-05, "loss": 0.5426, "step": 1229 }, { "epoch": 0.3113135914958238, "grad_norm": 0.15578638017177582, "learning_rate": 1.963685708981328e-05, "loss": 0.5561, "step": 1230 }, { "epoch": 0.31156669197671477, "grad_norm": 0.14452330768108368, "learning_rate": 1.9636217435555282e-05, "loss": 0.5751, "step": 1231 }, { "epoch": 0.31181979245760566, "grad_norm": 0.1484268307685852, "learning_rate": 1.963557722887338e-05, "loss": 0.5433, "step": 1232 }, { "epoch": 0.3120728929384966, "grad_norm": 0.14375555515289307, "learning_rate": 1.963493646980428e-05, "loss": 0.566, "step": 1233 }, { "epoch": 0.3123259934193875, "grad_norm": 0.14822691679000854, "learning_rate": 1.96342951583847e-05, "loss": 0.5551, "step": 1234 }, { "epoch": 0.31257909390027844, "grad_norm": 0.14368198812007904, "learning_rate": 1.963365329465142e-05, "loss": 0.5509, "step": 1235 }, { "epoch": 0.3128321943811693, "grad_norm": 0.1450127214193344, "learning_rate": 1.9633010878641236e-05, "loss": 0.5796, "step": 1236 }, { "epoch": 0.3130852948620602, "grad_norm": 0.14795133471488953, "learning_rate": 1.963236791039097e-05, "loss": 0.5687, "step": 1237 }, { "epoch": 0.31333839534295116, "grad_norm": 0.14844830334186554, "learning_rate": 1.9631724389937478e-05, "loss": 0.5522, "step": 1238 }, { "epoch": 0.31359149582384205, "grad_norm": 0.14689283072948456, "learning_rate": 1.9631080317317662e-05, "loss": 0.559, "step": 1239 }, { "epoch": 0.313844596304733, "grad_norm": 0.14795449376106262, "learning_rate": 1.9630435692568443e-05, "loss": 0.5182, "step": 1240 }, { "epoch": 0.3140976967856239, "grad_norm": 0.14915379881858826, "learning_rate": 1.9629790515726773e-05, "loss": 0.5407, "step": 1241 }, { "epoch": 0.3143507972665148, "grad_norm": 0.15982119739055634, "learning_rate": 1.9629144786829642e-05, "loss": 0.5515, "step": 1242 }, { "epoch": 0.3146038977474057, "grad_norm": 0.1536249965429306, "learning_rate": 1.9628498505914065e-05, "loss": 0.557, "step": 1243 }, { "epoch": 0.31485699822829666, "grad_norm": 0.15894600749015808, "learning_rate": 1.962785167301709e-05, "loss": 0.5411, "step": 1244 }, { "epoch": 0.31511009870918755, "grad_norm": 0.30941352248191833, "learning_rate": 1.9627204288175806e-05, "loss": 0.5907, "step": 1245 }, { "epoch": 0.31536319919007844, "grad_norm": 0.15000180900096893, "learning_rate": 1.9626556351427318e-05, "loss": 0.5546, "step": 1246 }, { "epoch": 0.3156162996709694, "grad_norm": 0.1520778387784958, "learning_rate": 1.9625907862808777e-05, "loss": 0.5743, "step": 1247 }, { "epoch": 0.31586940015186027, "grad_norm": 0.14461271464824677, "learning_rate": 1.9625258822357355e-05, "loss": 0.5321, "step": 1248 }, { "epoch": 0.3161225006327512, "grad_norm": 0.15170255303382874, "learning_rate": 1.9624609230110266e-05, "loss": 0.5558, "step": 1249 }, { "epoch": 0.3163756011136421, "grad_norm": 0.14372846484184265, "learning_rate": 1.9623959086104746e-05, "loss": 0.5265, "step": 1250 }, { "epoch": 0.31662870159453305, "grad_norm": 0.14450037479400635, "learning_rate": 1.9623308390378062e-05, "loss": 0.5717, "step": 1251 }, { "epoch": 0.31688180207542394, "grad_norm": 0.14532533288002014, "learning_rate": 1.9622657142967523e-05, "loss": 0.5581, "step": 1252 }, { "epoch": 0.3171349025563149, "grad_norm": 0.1428423672914505, "learning_rate": 1.9622005343910464e-05, "loss": 0.5561, "step": 1253 }, { "epoch": 0.31738800303720577, "grad_norm": 0.14719721674919128, "learning_rate": 1.9621352993244244e-05, "loss": 0.576, "step": 1254 }, { "epoch": 0.31764110351809666, "grad_norm": 0.15930767357349396, "learning_rate": 1.9620700091006274e-05, "loss": 0.5626, "step": 1255 }, { "epoch": 0.3178942039989876, "grad_norm": 0.13895705342292786, "learning_rate": 1.962004663723397e-05, "loss": 0.517, "step": 1256 }, { "epoch": 0.3181473044798785, "grad_norm": 0.1523996889591217, "learning_rate": 1.96193926319648e-05, "loss": 0.5687, "step": 1257 }, { "epoch": 0.31840040496076943, "grad_norm": 0.14647218585014343, "learning_rate": 1.9618738075236258e-05, "loss": 0.5316, "step": 1258 }, { "epoch": 0.3186535054416603, "grad_norm": 0.18226373195648193, "learning_rate": 1.961808296708586e-05, "loss": 0.5278, "step": 1259 }, { "epoch": 0.31890660592255127, "grad_norm": 0.1543813794851303, "learning_rate": 1.961742730755117e-05, "loss": 0.5644, "step": 1260 }, { "epoch": 0.31915970640344216, "grad_norm": 0.14397448301315308, "learning_rate": 1.961677109666978e-05, "loss": 0.5058, "step": 1261 }, { "epoch": 0.3194128068843331, "grad_norm": 0.14688347280025482, "learning_rate": 1.9616114334479293e-05, "loss": 0.5484, "step": 1262 }, { "epoch": 0.319665907365224, "grad_norm": 0.16274970769882202, "learning_rate": 1.9615457021017376e-05, "loss": 0.5387, "step": 1263 }, { "epoch": 0.31991900784611493, "grad_norm": 0.14345817267894745, "learning_rate": 1.96147991563217e-05, "loss": 0.5318, "step": 1264 }, { "epoch": 0.3201721083270058, "grad_norm": 0.14996305108070374, "learning_rate": 1.9614140740429987e-05, "loss": 0.5673, "step": 1265 }, { "epoch": 0.3204252088078967, "grad_norm": 0.14910423755645752, "learning_rate": 1.961348177337998e-05, "loss": 0.5521, "step": 1266 }, { "epoch": 0.32067830928878766, "grad_norm": 0.15267117321491241, "learning_rate": 1.9612822255209448e-05, "loss": 0.5619, "step": 1267 }, { "epoch": 0.32093140976967854, "grad_norm": 0.15022993087768555, "learning_rate": 1.9612162185956215e-05, "loss": 0.5441, "step": 1268 }, { "epoch": 0.3211845102505695, "grad_norm": 0.14584487676620483, "learning_rate": 1.9611501565658112e-05, "loss": 0.5511, "step": 1269 }, { "epoch": 0.3214376107314604, "grad_norm": 0.14786754548549652, "learning_rate": 1.9610840394353012e-05, "loss": 0.5311, "step": 1270 }, { "epoch": 0.3216907112123513, "grad_norm": 0.14073446393013, "learning_rate": 1.961017867207882e-05, "loss": 0.5499, "step": 1271 }, { "epoch": 0.3219438116932422, "grad_norm": 0.152338907122612, "learning_rate": 1.960951639887347e-05, "loss": 0.5565, "step": 1272 }, { "epoch": 0.32219691217413315, "grad_norm": 0.14627261459827423, "learning_rate": 1.9608853574774928e-05, "loss": 0.5581, "step": 1273 }, { "epoch": 0.32245001265502404, "grad_norm": 0.1517730951309204, "learning_rate": 1.9608190199821194e-05, "loss": 0.5723, "step": 1274 }, { "epoch": 0.32270311313591493, "grad_norm": 0.14177106320858002, "learning_rate": 1.9607526274050296e-05, "loss": 0.5347, "step": 1275 }, { "epoch": 0.3229562136168059, "grad_norm": 0.1493445485830307, "learning_rate": 1.9606861797500297e-05, "loss": 0.5429, "step": 1276 }, { "epoch": 0.32320931409769676, "grad_norm": 0.14698271453380585, "learning_rate": 1.9606196770209293e-05, "loss": 0.5202, "step": 1277 }, { "epoch": 0.3234624145785877, "grad_norm": 0.15068919956684113, "learning_rate": 1.96055311922154e-05, "loss": 0.5311, "step": 1278 }, { "epoch": 0.3237155150594786, "grad_norm": 0.16112153232097626, "learning_rate": 1.9604865063556782e-05, "loss": 0.5354, "step": 1279 }, { "epoch": 0.32396861554036954, "grad_norm": 0.14112502336502075, "learning_rate": 1.9604198384271623e-05, "loss": 0.5646, "step": 1280 }, { "epoch": 0.32422171602126043, "grad_norm": 0.16473205387592316, "learning_rate": 1.9603531154398142e-05, "loss": 0.5443, "step": 1281 }, { "epoch": 0.3244748165021514, "grad_norm": 0.1423436552286148, "learning_rate": 1.9602863373974598e-05, "loss": 0.5461, "step": 1282 }, { "epoch": 0.32472791698304226, "grad_norm": 0.17554926872253418, "learning_rate": 1.9602195043039262e-05, "loss": 0.5414, "step": 1283 }, { "epoch": 0.3249810174639332, "grad_norm": 0.1491205394268036, "learning_rate": 1.960152616163045e-05, "loss": 0.55, "step": 1284 }, { "epoch": 0.3252341179448241, "grad_norm": 0.14338554441928864, "learning_rate": 1.9600856729786515e-05, "loss": 0.5461, "step": 1285 }, { "epoch": 0.325487218425715, "grad_norm": 0.14553028345108032, "learning_rate": 1.960018674754583e-05, "loss": 0.5636, "step": 1286 }, { "epoch": 0.32574031890660593, "grad_norm": 0.15363600850105286, "learning_rate": 1.9599516214946802e-05, "loss": 0.5671, "step": 1287 }, { "epoch": 0.3259934193874968, "grad_norm": 0.14060044288635254, "learning_rate": 1.959884513202787e-05, "loss": 0.5303, "step": 1288 }, { "epoch": 0.32624651986838776, "grad_norm": 0.15078315138816833, "learning_rate": 1.959817349882751e-05, "loss": 0.5658, "step": 1289 }, { "epoch": 0.32649962034927865, "grad_norm": 0.1502080112695694, "learning_rate": 1.9597501315384223e-05, "loss": 0.5571, "step": 1290 }, { "epoch": 0.3267527208301696, "grad_norm": 0.14814816415309906, "learning_rate": 1.9596828581736545e-05, "loss": 0.5646, "step": 1291 }, { "epoch": 0.3270058213110605, "grad_norm": 0.15655817091464996, "learning_rate": 1.9596155297923037e-05, "loss": 0.5691, "step": 1292 }, { "epoch": 0.32725892179195143, "grad_norm": 0.1446794867515564, "learning_rate": 1.9595481463982308e-05, "loss": 0.5333, "step": 1293 }, { "epoch": 0.3275120222728423, "grad_norm": 0.1509925127029419, "learning_rate": 1.9594807079952978e-05, "loss": 0.55, "step": 1294 }, { "epoch": 0.3277651227537332, "grad_norm": 0.1459847241640091, "learning_rate": 1.959413214587371e-05, "loss": 0.5386, "step": 1295 }, { "epoch": 0.32801822323462415, "grad_norm": 0.1447642296552658, "learning_rate": 1.95934566617832e-05, "loss": 0.5471, "step": 1296 }, { "epoch": 0.32827132371551504, "grad_norm": 0.15009340643882751, "learning_rate": 1.9592780627720168e-05, "loss": 0.5552, "step": 1297 }, { "epoch": 0.328524424196406, "grad_norm": 0.1540219485759735, "learning_rate": 1.9592104043723372e-05, "loss": 0.5529, "step": 1298 }, { "epoch": 0.3287775246772969, "grad_norm": 0.14725595712661743, "learning_rate": 1.9591426909831595e-05, "loss": 0.5668, "step": 1299 }, { "epoch": 0.3290306251581878, "grad_norm": 0.1435309201478958, "learning_rate": 1.9590749226083664e-05, "loss": 0.5603, "step": 1300 }, { "epoch": 0.3292837256390787, "grad_norm": 0.151546910405159, "learning_rate": 1.959007099251842e-05, "loss": 0.5497, "step": 1301 }, { "epoch": 0.32953682611996965, "grad_norm": 0.1673007309436798, "learning_rate": 1.9589392209174756e-05, "loss": 0.5575, "step": 1302 }, { "epoch": 0.32978992660086054, "grad_norm": 0.1480465680360794, "learning_rate": 1.9588712876091572e-05, "loss": 0.5506, "step": 1303 }, { "epoch": 0.33004302708175143, "grad_norm": 0.15586607158184052, "learning_rate": 1.958803299330782e-05, "loss": 0.5569, "step": 1304 }, { "epoch": 0.33029612756264237, "grad_norm": 0.1441100686788559, "learning_rate": 1.9587352560862473e-05, "loss": 0.5527, "step": 1305 }, { "epoch": 0.33054922804353326, "grad_norm": 0.15386159718036652, "learning_rate": 1.9586671578794544e-05, "loss": 0.5527, "step": 1306 }, { "epoch": 0.3308023285244242, "grad_norm": 0.15203054249286652, "learning_rate": 1.958599004714307e-05, "loss": 0.5776, "step": 1307 }, { "epoch": 0.3310554290053151, "grad_norm": 0.15152958035469055, "learning_rate": 1.958530796594712e-05, "loss": 0.5666, "step": 1308 }, { "epoch": 0.33130852948620604, "grad_norm": 0.15217675268650055, "learning_rate": 1.9584625335245792e-05, "loss": 0.5522, "step": 1309 }, { "epoch": 0.3315616299670969, "grad_norm": 0.15009896457195282, "learning_rate": 1.958394215507823e-05, "loss": 0.5431, "step": 1310 }, { "epoch": 0.33181473044798787, "grad_norm": 0.14295952022075653, "learning_rate": 1.958325842548359e-05, "loss": 0.5504, "step": 1311 }, { "epoch": 0.33206783092887876, "grad_norm": 0.14832429587841034, "learning_rate": 1.9582574146501077e-05, "loss": 0.5468, "step": 1312 }, { "epoch": 0.3323209314097697, "grad_norm": 0.14665763080120087, "learning_rate": 1.9581889318169915e-05, "loss": 0.5596, "step": 1313 }, { "epoch": 0.3325740318906606, "grad_norm": 0.15014337003231049, "learning_rate": 1.9581203940529362e-05, "loss": 0.5636, "step": 1314 }, { "epoch": 0.3328271323715515, "grad_norm": 0.2629657983779907, "learning_rate": 1.9580518013618714e-05, "loss": 0.5379, "step": 1315 }, { "epoch": 0.3330802328524424, "grad_norm": 0.14648117125034332, "learning_rate": 1.9579831537477286e-05, "loss": 0.5614, "step": 1316 }, { "epoch": 0.3333333333333333, "grad_norm": 0.15964412689208984, "learning_rate": 1.9579144512144442e-05, "loss": 0.5497, "step": 1317 }, { "epoch": 0.33358643381422426, "grad_norm": 0.14608141779899597, "learning_rate": 1.957845693765956e-05, "loss": 0.5498, "step": 1318 }, { "epoch": 0.33383953429511515, "grad_norm": 0.1524888575077057, "learning_rate": 1.9577768814062058e-05, "loss": 0.5459, "step": 1319 }, { "epoch": 0.3340926347760061, "grad_norm": 0.14381134510040283, "learning_rate": 1.9577080141391393e-05, "loss": 0.5614, "step": 1320 }, { "epoch": 0.334345735256897, "grad_norm": 0.14823700487613678, "learning_rate": 1.9576390919687033e-05, "loss": 0.5611, "step": 1321 }, { "epoch": 0.3345988357377879, "grad_norm": 0.14616329967975616, "learning_rate": 1.9575701148988497e-05, "loss": 0.52, "step": 1322 }, { "epoch": 0.3348519362186788, "grad_norm": 0.1496943235397339, "learning_rate": 1.9575010829335328e-05, "loss": 0.5572, "step": 1323 }, { "epoch": 0.3351050366995697, "grad_norm": 0.1528523713350296, "learning_rate": 1.95743199607671e-05, "loss": 0.5452, "step": 1324 }, { "epoch": 0.33535813718046065, "grad_norm": 0.14889608323574066, "learning_rate": 1.9573628543323414e-05, "loss": 0.5365, "step": 1325 }, { "epoch": 0.33561123766135154, "grad_norm": 0.1547989845275879, "learning_rate": 1.9572936577043915e-05, "loss": 0.5476, "step": 1326 }, { "epoch": 0.3358643381422425, "grad_norm": 0.17157310247421265, "learning_rate": 1.9572244061968265e-05, "loss": 0.5476, "step": 1327 }, { "epoch": 0.33611743862313337, "grad_norm": 0.1616293042898178, "learning_rate": 1.9571550998136172e-05, "loss": 0.5407, "step": 1328 }, { "epoch": 0.3363705391040243, "grad_norm": 0.15019971132278442, "learning_rate": 1.9570857385587363e-05, "loss": 0.5362, "step": 1329 }, { "epoch": 0.3366236395849152, "grad_norm": 0.15603739023208618, "learning_rate": 1.9570163224361602e-05, "loss": 0.5458, "step": 1330 }, { "epoch": 0.33687674006580615, "grad_norm": 0.14277146756649017, "learning_rate": 1.9569468514498683e-05, "loss": 0.5376, "step": 1331 }, { "epoch": 0.33712984054669703, "grad_norm": 0.14798814058303833, "learning_rate": 1.9568773256038437e-05, "loss": 0.5618, "step": 1332 }, { "epoch": 0.337382941027588, "grad_norm": 0.15534618496894836, "learning_rate": 1.9568077449020714e-05, "loss": 0.5585, "step": 1333 }, { "epoch": 0.33763604150847887, "grad_norm": 0.15266895294189453, "learning_rate": 1.9567381093485407e-05, "loss": 0.5676, "step": 1334 }, { "epoch": 0.33788914198936976, "grad_norm": 0.14401869475841522, "learning_rate": 1.9566684189472437e-05, "loss": 0.5631, "step": 1335 }, { "epoch": 0.3381422424702607, "grad_norm": 0.14957746863365173, "learning_rate": 1.9565986737021755e-05, "loss": 0.5474, "step": 1336 }, { "epoch": 0.3383953429511516, "grad_norm": 0.1466982066631317, "learning_rate": 1.9565288736173347e-05, "loss": 0.5371, "step": 1337 }, { "epoch": 0.33864844343204253, "grad_norm": 0.153359055519104, "learning_rate": 1.9564590186967224e-05, "loss": 0.5191, "step": 1338 }, { "epoch": 0.3389015439129334, "grad_norm": 0.1684064418077469, "learning_rate": 1.9563891089443436e-05, "loss": 0.5869, "step": 1339 }, { "epoch": 0.33915464439382437, "grad_norm": 0.14754392206668854, "learning_rate": 1.956319144364206e-05, "loss": 0.5393, "step": 1340 }, { "epoch": 0.33940774487471526, "grad_norm": 0.14714030921459198, "learning_rate": 1.9562491249603205e-05, "loss": 0.5592, "step": 1341 }, { "epoch": 0.3396608453556062, "grad_norm": 0.14940780401229858, "learning_rate": 1.956179050736701e-05, "loss": 0.5767, "step": 1342 }, { "epoch": 0.3399139458364971, "grad_norm": 0.1524302363395691, "learning_rate": 1.9561089216973644e-05, "loss": 0.5508, "step": 1343 }, { "epoch": 0.340167046317388, "grad_norm": 0.1543436348438263, "learning_rate": 1.956038737846332e-05, "loss": 0.5439, "step": 1344 }, { "epoch": 0.3404201467982789, "grad_norm": 0.1465919464826584, "learning_rate": 1.9559684991876264e-05, "loss": 0.5602, "step": 1345 }, { "epoch": 0.3406732472791698, "grad_norm": 0.1619967222213745, "learning_rate": 1.9558982057252747e-05, "loss": 0.5605, "step": 1346 }, { "epoch": 0.34092634776006076, "grad_norm": 0.1516886055469513, "learning_rate": 1.9558278574633066e-05, "loss": 0.5531, "step": 1347 }, { "epoch": 0.34117944824095164, "grad_norm": 0.13994504511356354, "learning_rate": 1.9557574544057552e-05, "loss": 0.5326, "step": 1348 }, { "epoch": 0.3414325487218426, "grad_norm": 0.145668625831604, "learning_rate": 1.955686996556656e-05, "loss": 0.5703, "step": 1349 }, { "epoch": 0.3416856492027335, "grad_norm": 0.14654158055782318, "learning_rate": 1.9556164839200487e-05, "loss": 0.5418, "step": 1350 }, { "epoch": 0.3419387496836244, "grad_norm": 0.14354093372821808, "learning_rate": 1.9555459164999752e-05, "loss": 0.5769, "step": 1351 }, { "epoch": 0.3421918501645153, "grad_norm": 0.1480148732662201, "learning_rate": 1.9554752943004816e-05, "loss": 0.5573, "step": 1352 }, { "epoch": 0.3424449506454062, "grad_norm": 0.16286349296569824, "learning_rate": 1.955404617325616e-05, "loss": 0.544, "step": 1353 }, { "epoch": 0.34269805112629714, "grad_norm": 0.15159283578395844, "learning_rate": 1.9553338855794302e-05, "loss": 0.5495, "step": 1354 }, { "epoch": 0.34295115160718803, "grad_norm": 0.14285486936569214, "learning_rate": 1.9552630990659796e-05, "loss": 0.5738, "step": 1355 }, { "epoch": 0.343204252088079, "grad_norm": 0.14942722022533417, "learning_rate": 1.9551922577893214e-05, "loss": 0.586, "step": 1356 }, { "epoch": 0.34345735256896986, "grad_norm": 0.1516982465982437, "learning_rate": 1.9551213617535176e-05, "loss": 0.5626, "step": 1357 }, { "epoch": 0.3437104530498608, "grad_norm": 0.15070468187332153, "learning_rate": 1.9550504109626324e-05, "loss": 0.5479, "step": 1358 }, { "epoch": 0.3439635535307517, "grad_norm": 0.1549229472875595, "learning_rate": 1.9549794054207324e-05, "loss": 0.547, "step": 1359 }, { "epoch": 0.34421665401164264, "grad_norm": 0.1502760797739029, "learning_rate": 1.9549083451318893e-05, "loss": 0.542, "step": 1360 }, { "epoch": 0.34446975449253353, "grad_norm": 0.14848487079143524, "learning_rate": 1.9548372301001764e-05, "loss": 0.5609, "step": 1361 }, { "epoch": 0.3447228549734245, "grad_norm": 0.1615775227546692, "learning_rate": 1.9547660603296702e-05, "loss": 0.5792, "step": 1362 }, { "epoch": 0.34497595545431536, "grad_norm": 0.15550091862678528, "learning_rate": 1.9546948358244513e-05, "loss": 0.5413, "step": 1363 }, { "epoch": 0.34522905593520625, "grad_norm": 0.14651690423488617, "learning_rate": 1.9546235565886024e-05, "loss": 0.5402, "step": 1364 }, { "epoch": 0.3454821564160972, "grad_norm": 0.1453961730003357, "learning_rate": 1.9545522226262102e-05, "loss": 0.5549, "step": 1365 }, { "epoch": 0.3457352568969881, "grad_norm": 0.14784598350524902, "learning_rate": 1.954480833941364e-05, "loss": 0.5399, "step": 1366 }, { "epoch": 0.34598835737787903, "grad_norm": 0.16435948014259338, "learning_rate": 1.954409390538156e-05, "loss": 0.5305, "step": 1367 }, { "epoch": 0.3462414578587699, "grad_norm": 0.1500237137079239, "learning_rate": 1.954337892420682e-05, "loss": 0.533, "step": 1368 }, { "epoch": 0.34649455833966086, "grad_norm": 0.14695794880390167, "learning_rate": 1.9542663395930414e-05, "loss": 0.5699, "step": 1369 }, { "epoch": 0.34674765882055175, "grad_norm": 0.1687469333410263, "learning_rate": 1.9541947320593356e-05, "loss": 0.5452, "step": 1370 }, { "epoch": 0.3470007593014427, "grad_norm": 0.14847731590270996, "learning_rate": 1.9541230698236703e-05, "loss": 0.5434, "step": 1371 }, { "epoch": 0.3472538597823336, "grad_norm": 0.14137008786201477, "learning_rate": 1.954051352890153e-05, "loss": 0.5174, "step": 1372 }, { "epoch": 0.3475069602632245, "grad_norm": 0.1569334715604782, "learning_rate": 1.953979581262895e-05, "loss": 0.5393, "step": 1373 }, { "epoch": 0.3477600607441154, "grad_norm": 0.14948101341724396, "learning_rate": 1.953907754946012e-05, "loss": 0.5647, "step": 1374 }, { "epoch": 0.3480131612250063, "grad_norm": 0.1450328826904297, "learning_rate": 1.9538358739436206e-05, "loss": 0.57, "step": 1375 }, { "epoch": 0.34826626170589725, "grad_norm": 0.14773407578468323, "learning_rate": 1.9537639382598417e-05, "loss": 0.553, "step": 1376 }, { "epoch": 0.34851936218678814, "grad_norm": 0.18741102516651154, "learning_rate": 1.9536919478987995e-05, "loss": 0.5694, "step": 1377 }, { "epoch": 0.3487724626676791, "grad_norm": 0.14810268580913544, "learning_rate": 1.953619902864621e-05, "loss": 0.5612, "step": 1378 }, { "epoch": 0.34902556314857, "grad_norm": 0.1470220386981964, "learning_rate": 1.9535478031614362e-05, "loss": 0.5428, "step": 1379 }, { "epoch": 0.3492786636294609, "grad_norm": 0.15955446660518646, "learning_rate": 1.9534756487933784e-05, "loss": 0.5645, "step": 1380 }, { "epoch": 0.3495317641103518, "grad_norm": 0.15951718389987946, "learning_rate": 1.9534034397645844e-05, "loss": 0.5398, "step": 1381 }, { "epoch": 0.34978486459124275, "grad_norm": 0.15523962676525116, "learning_rate": 1.9533311760791937e-05, "loss": 0.5445, "step": 1382 }, { "epoch": 0.35003796507213364, "grad_norm": 0.14976316690444946, "learning_rate": 1.9532588577413487e-05, "loss": 0.5801, "step": 1383 }, { "epoch": 0.3502910655530245, "grad_norm": 0.15287049114704132, "learning_rate": 1.9531864847551958e-05, "loss": 0.5723, "step": 1384 }, { "epoch": 0.35054416603391547, "grad_norm": 0.14190274477005005, "learning_rate": 1.9531140571248835e-05, "loss": 0.547, "step": 1385 }, { "epoch": 0.35079726651480636, "grad_norm": 0.14908644556999207, "learning_rate": 1.9530415748545638e-05, "loss": 0.5525, "step": 1386 }, { "epoch": 0.3510503669956973, "grad_norm": 0.1516028642654419, "learning_rate": 1.9529690379483926e-05, "loss": 0.5697, "step": 1387 }, { "epoch": 0.3513034674765882, "grad_norm": 0.14582683145999908, "learning_rate": 1.9528964464105276e-05, "loss": 0.5341, "step": 1388 }, { "epoch": 0.35155656795747914, "grad_norm": 0.15602953732013702, "learning_rate": 1.952823800245131e-05, "loss": 0.5573, "step": 1389 }, { "epoch": 0.35180966843837, "grad_norm": 0.1433050036430359, "learning_rate": 1.952751099456367e-05, "loss": 0.5232, "step": 1390 }, { "epoch": 0.35206276891926097, "grad_norm": 0.156890407204628, "learning_rate": 1.952678344048404e-05, "loss": 0.5444, "step": 1391 }, { "epoch": 0.35231586940015186, "grad_norm": 0.15238720178604126, "learning_rate": 1.9526055340254117e-05, "loss": 0.5577, "step": 1392 }, { "epoch": 0.35256896988104275, "grad_norm": 0.14713051915168762, "learning_rate": 1.952532669391565e-05, "loss": 0.5548, "step": 1393 }, { "epoch": 0.3528220703619337, "grad_norm": 0.14882564544677734, "learning_rate": 1.9524597501510408e-05, "loss": 0.5599, "step": 1394 }, { "epoch": 0.3530751708428246, "grad_norm": 0.15113122761249542, "learning_rate": 1.95238677630802e-05, "loss": 0.5508, "step": 1395 }, { "epoch": 0.3533282713237155, "grad_norm": 0.14699649810791016, "learning_rate": 1.952313747866685e-05, "loss": 0.5297, "step": 1396 }, { "epoch": 0.3535813718046064, "grad_norm": 0.14875228703022003, "learning_rate": 1.9522406648312232e-05, "loss": 0.5425, "step": 1397 }, { "epoch": 0.35383447228549736, "grad_norm": 0.1482452005147934, "learning_rate": 1.952167527205824e-05, "loss": 0.5393, "step": 1398 }, { "epoch": 0.35408757276638825, "grad_norm": 0.1498897224664688, "learning_rate": 1.95209433499468e-05, "loss": 0.5603, "step": 1399 }, { "epoch": 0.3543406732472792, "grad_norm": 0.15813641250133514, "learning_rate": 1.9520210882019878e-05, "loss": 0.5328, "step": 1400 }, { "epoch": 0.3545937737281701, "grad_norm": 0.15140895545482635, "learning_rate": 1.9519477868319457e-05, "loss": 0.5577, "step": 1401 }, { "epoch": 0.35484687420906097, "grad_norm": 0.15345318615436554, "learning_rate": 1.9518744308887566e-05, "loss": 0.5369, "step": 1402 }, { "epoch": 0.3550999746899519, "grad_norm": 0.16097603738307953, "learning_rate": 1.9518010203766256e-05, "loss": 0.5621, "step": 1403 }, { "epoch": 0.3553530751708428, "grad_norm": 0.152102530002594, "learning_rate": 1.9517275552997605e-05, "loss": 0.5524, "step": 1404 }, { "epoch": 0.35560617565173375, "grad_norm": 0.13988344371318817, "learning_rate": 1.9516540356623742e-05, "loss": 0.547, "step": 1405 }, { "epoch": 0.35585927613262464, "grad_norm": 0.1525922417640686, "learning_rate": 1.9515804614686804e-05, "loss": 0.5702, "step": 1406 }, { "epoch": 0.3561123766135156, "grad_norm": 0.14725516736507416, "learning_rate": 1.951506832722897e-05, "loss": 0.5532, "step": 1407 }, { "epoch": 0.35636547709440647, "grad_norm": 0.14990653097629547, "learning_rate": 1.9514331494292458e-05, "loss": 0.5763, "step": 1408 }, { "epoch": 0.3566185775752974, "grad_norm": 0.16074863076210022, "learning_rate": 1.95135941159195e-05, "loss": 0.5419, "step": 1409 }, { "epoch": 0.3568716780561883, "grad_norm": 0.1455734223127365, "learning_rate": 1.9512856192152376e-05, "loss": 0.5462, "step": 1410 }, { "epoch": 0.35712477853707925, "grad_norm": 0.1445416659116745, "learning_rate": 1.951211772303338e-05, "loss": 0.5269, "step": 1411 }, { "epoch": 0.35737787901797013, "grad_norm": 0.14358116686344147, "learning_rate": 1.9511378708604857e-05, "loss": 0.5465, "step": 1412 }, { "epoch": 0.357630979498861, "grad_norm": 0.15081366896629333, "learning_rate": 1.951063914890917e-05, "loss": 0.5418, "step": 1413 }, { "epoch": 0.35788407997975197, "grad_norm": 0.14173376560211182, "learning_rate": 1.950989904398871e-05, "loss": 0.5512, "step": 1414 }, { "epoch": 0.35813718046064286, "grad_norm": 0.14065559208393097, "learning_rate": 1.9509158393885914e-05, "loss": 0.5422, "step": 1415 }, { "epoch": 0.3583902809415338, "grad_norm": 0.14772234857082367, "learning_rate": 1.9508417198643234e-05, "loss": 0.5548, "step": 1416 }, { "epoch": 0.3586433814224247, "grad_norm": 0.14474010467529297, "learning_rate": 1.950767545830317e-05, "loss": 0.553, "step": 1417 }, { "epoch": 0.35889648190331563, "grad_norm": 0.1493399441242218, "learning_rate": 1.950693317290824e-05, "loss": 0.5676, "step": 1418 }, { "epoch": 0.3591495823842065, "grad_norm": 0.15082105994224548, "learning_rate": 1.9506190342500997e-05, "loss": 0.5533, "step": 1419 }, { "epoch": 0.35940268286509747, "grad_norm": 0.1458379477262497, "learning_rate": 1.9505446967124025e-05, "loss": 0.5529, "step": 1420 }, { "epoch": 0.35965578334598836, "grad_norm": 0.15100237727165222, "learning_rate": 1.9504703046819944e-05, "loss": 0.5715, "step": 1421 }, { "epoch": 0.35990888382687924, "grad_norm": 0.14533960819244385, "learning_rate": 1.9503958581631396e-05, "loss": 0.5622, "step": 1422 }, { "epoch": 0.3601619843077702, "grad_norm": 0.17402461171150208, "learning_rate": 1.9503213571601067e-05, "loss": 0.5827, "step": 1423 }, { "epoch": 0.3604150847886611, "grad_norm": 0.1494351178407669, "learning_rate": 1.950246801677166e-05, "loss": 0.5436, "step": 1424 }, { "epoch": 0.360668185269552, "grad_norm": 0.14508825540542603, "learning_rate": 1.950172191718592e-05, "loss": 0.5235, "step": 1425 }, { "epoch": 0.3609212857504429, "grad_norm": 0.14634418487548828, "learning_rate": 1.9500975272886616e-05, "loss": 0.5168, "step": 1426 }, { "epoch": 0.36117438623133385, "grad_norm": 0.14786958694458008, "learning_rate": 1.9500228083916554e-05, "loss": 0.5493, "step": 1427 }, { "epoch": 0.36142748671222474, "grad_norm": 0.1488710641860962, "learning_rate": 1.949948035031857e-05, "loss": 0.5677, "step": 1428 }, { "epoch": 0.3616805871931157, "grad_norm": 0.14901059865951538, "learning_rate": 1.9498732072135526e-05, "loss": 0.5349, "step": 1429 }, { "epoch": 0.3619336876740066, "grad_norm": 0.1447782665491104, "learning_rate": 1.9497983249410324e-05, "loss": 0.5348, "step": 1430 }, { "epoch": 0.3621867881548975, "grad_norm": 0.14676731824874878, "learning_rate": 1.9497233882185886e-05, "loss": 0.5376, "step": 1431 }, { "epoch": 0.3624398886357884, "grad_norm": 0.1531912237405777, "learning_rate": 1.949648397050518e-05, "loss": 0.5415, "step": 1432 }, { "epoch": 0.3626929891166793, "grad_norm": 0.18537327647209167, "learning_rate": 1.9495733514411187e-05, "loss": 0.5479, "step": 1433 }, { "epoch": 0.36294608959757024, "grad_norm": 0.1582462340593338, "learning_rate": 1.9494982513946937e-05, "loss": 0.5685, "step": 1434 }, { "epoch": 0.36319919007846113, "grad_norm": 0.15838588774204254, "learning_rate": 1.9494230969155484e-05, "loss": 0.5342, "step": 1435 }, { "epoch": 0.3634522905593521, "grad_norm": 0.14424557983875275, "learning_rate": 1.9493478880079903e-05, "loss": 0.5342, "step": 1436 }, { "epoch": 0.36370539104024296, "grad_norm": 0.14751091599464417, "learning_rate": 1.9492726246763322e-05, "loss": 0.5417, "step": 1437 }, { "epoch": 0.3639584915211339, "grad_norm": 0.22001822292804718, "learning_rate": 1.949197306924888e-05, "loss": 0.5519, "step": 1438 }, { "epoch": 0.3642115920020248, "grad_norm": 0.14862795174121857, "learning_rate": 1.9491219347579752e-05, "loss": 0.5499, "step": 1439 }, { "epoch": 0.36446469248291574, "grad_norm": 0.14559324085712433, "learning_rate": 1.9490465081799158e-05, "loss": 0.541, "step": 1440 }, { "epoch": 0.36471779296380663, "grad_norm": 0.14515161514282227, "learning_rate": 1.9489710271950327e-05, "loss": 0.5354, "step": 1441 }, { "epoch": 0.3649708934446975, "grad_norm": 0.15026681125164032, "learning_rate": 1.9488954918076538e-05, "loss": 0.5751, "step": 1442 }, { "epoch": 0.36522399392558846, "grad_norm": 0.1478470116853714, "learning_rate": 1.9488199020221094e-05, "loss": 0.5488, "step": 1443 }, { "epoch": 0.36547709440647935, "grad_norm": 0.1495615541934967, "learning_rate": 1.9487442578427328e-05, "loss": 0.5296, "step": 1444 }, { "epoch": 0.3657301948873703, "grad_norm": 0.14521771669387817, "learning_rate": 1.94866855927386e-05, "loss": 0.548, "step": 1445 }, { "epoch": 0.3659832953682612, "grad_norm": 0.14744411408901215, "learning_rate": 1.9485928063198313e-05, "loss": 0.5557, "step": 1446 }, { "epoch": 0.36623639584915213, "grad_norm": 0.14081555604934692, "learning_rate": 1.948516998984989e-05, "loss": 0.5326, "step": 1447 }, { "epoch": 0.366489496330043, "grad_norm": 0.14889878034591675, "learning_rate": 1.9484411372736797e-05, "loss": 0.5661, "step": 1448 }, { "epoch": 0.36674259681093396, "grad_norm": 0.1612987369298935, "learning_rate": 1.948365221190251e-05, "loss": 0.5478, "step": 1449 }, { "epoch": 0.36699569729182485, "grad_norm": 0.15219271183013916, "learning_rate": 1.9482892507390568e-05, "loss": 0.5557, "step": 1450 }, { "epoch": 0.36724879777271574, "grad_norm": 0.15758199989795685, "learning_rate": 1.948213225924451e-05, "loss": 0.6024, "step": 1451 }, { "epoch": 0.3675018982536067, "grad_norm": 0.14763115346431732, "learning_rate": 1.9481371467507923e-05, "loss": 0.5329, "step": 1452 }, { "epoch": 0.3677549987344976, "grad_norm": 0.14281617105007172, "learning_rate": 1.948061013222442e-05, "loss": 0.5341, "step": 1453 }, { "epoch": 0.3680080992153885, "grad_norm": 0.14848028123378754, "learning_rate": 1.9479848253437652e-05, "loss": 0.5416, "step": 1454 }, { "epoch": 0.3682611996962794, "grad_norm": 0.1465156376361847, "learning_rate": 1.947908583119129e-05, "loss": 0.5307, "step": 1455 }, { "epoch": 0.36851430017717035, "grad_norm": 0.14618930220603943, "learning_rate": 1.947832286552905e-05, "loss": 0.5412, "step": 1456 }, { "epoch": 0.36876740065806124, "grad_norm": 0.15129581093788147, "learning_rate": 1.9477559356494662e-05, "loss": 0.5447, "step": 1457 }, { "epoch": 0.3690205011389522, "grad_norm": 0.1473216861486435, "learning_rate": 1.94767953041319e-05, "loss": 0.5243, "step": 1458 }, { "epoch": 0.3692736016198431, "grad_norm": 0.14416159689426422, "learning_rate": 1.9476030708484568e-05, "loss": 0.5422, "step": 1459 }, { "epoch": 0.369526702100734, "grad_norm": 0.14985951781272888, "learning_rate": 1.9475265569596495e-05, "loss": 0.5885, "step": 1460 }, { "epoch": 0.3697798025816249, "grad_norm": 0.15145282447338104, "learning_rate": 1.9474499887511546e-05, "loss": 0.5507, "step": 1461 }, { "epoch": 0.3700329030625158, "grad_norm": 0.1508771777153015, "learning_rate": 1.9473733662273618e-05, "loss": 0.5431, "step": 1462 }, { "epoch": 0.37028600354340674, "grad_norm": 0.15162132680416107, "learning_rate": 1.947296689392663e-05, "loss": 0.5388, "step": 1463 }, { "epoch": 0.3705391040242976, "grad_norm": 0.14948506653308868, "learning_rate": 1.947219958251455e-05, "loss": 0.5429, "step": 1464 }, { "epoch": 0.37079220450518857, "grad_norm": 0.1548861265182495, "learning_rate": 1.947143172808136e-05, "loss": 0.5538, "step": 1465 }, { "epoch": 0.37104530498607946, "grad_norm": 0.1493985801935196, "learning_rate": 1.9470663330671077e-05, "loss": 0.5254, "step": 1466 }, { "epoch": 0.3712984054669704, "grad_norm": 0.1530543714761734, "learning_rate": 1.946989439032776e-05, "loss": 0.5355, "step": 1467 }, { "epoch": 0.3715515059478613, "grad_norm": 0.1417504996061325, "learning_rate": 1.9469124907095483e-05, "loss": 0.5415, "step": 1468 }, { "epoch": 0.37180460642875224, "grad_norm": 0.14839012920856476, "learning_rate": 1.946835488101836e-05, "loss": 0.5535, "step": 1469 }, { "epoch": 0.3720577069096431, "grad_norm": 0.1545441746711731, "learning_rate": 1.9467584312140538e-05, "loss": 0.5375, "step": 1470 }, { "epoch": 0.372310807390534, "grad_norm": 0.1494404524564743, "learning_rate": 1.946681320050619e-05, "loss": 0.5845, "step": 1471 }, { "epoch": 0.37256390787142496, "grad_norm": 0.14412353932857513, "learning_rate": 1.946604154615952e-05, "loss": 0.5404, "step": 1472 }, { "epoch": 0.37281700835231585, "grad_norm": 0.14045435190200806, "learning_rate": 1.9465269349144772e-05, "loss": 0.5584, "step": 1473 }, { "epoch": 0.3730701088332068, "grad_norm": 0.14929087460041046, "learning_rate": 1.946449660950621e-05, "loss": 0.5725, "step": 1474 }, { "epoch": 0.3733232093140977, "grad_norm": 0.14448022842407227, "learning_rate": 1.9463723327288134e-05, "loss": 0.5482, "step": 1475 }, { "epoch": 0.3735763097949886, "grad_norm": 0.145907461643219, "learning_rate": 1.946294950253487e-05, "loss": 0.5593, "step": 1476 }, { "epoch": 0.3738294102758795, "grad_norm": 0.15283805131912231, "learning_rate": 1.946217513529079e-05, "loss": 0.5719, "step": 1477 }, { "epoch": 0.37408251075677046, "grad_norm": 0.22472895681858063, "learning_rate": 1.9461400225600276e-05, "loss": 0.5779, "step": 1478 }, { "epoch": 0.37433561123766135, "grad_norm": 0.14655548334121704, "learning_rate": 1.946062477350776e-05, "loss": 0.5612, "step": 1479 }, { "epoch": 0.3745887117185523, "grad_norm": 0.14366453886032104, "learning_rate": 1.9459848779057694e-05, "loss": 0.5442, "step": 1480 }, { "epoch": 0.3748418121994432, "grad_norm": 0.148257315158844, "learning_rate": 1.9459072242294566e-05, "loss": 0.544, "step": 1481 }, { "epoch": 0.37509491268033407, "grad_norm": 0.16711963713169098, "learning_rate": 1.945829516326289e-05, "loss": 0.5755, "step": 1482 }, { "epoch": 0.375348013161225, "grad_norm": 0.1500495970249176, "learning_rate": 1.9457517542007212e-05, "loss": 0.5389, "step": 1483 }, { "epoch": 0.3756011136421159, "grad_norm": 0.14473144710063934, "learning_rate": 1.945673937857212e-05, "loss": 0.5487, "step": 1484 }, { "epoch": 0.37585421412300685, "grad_norm": 0.15230558812618256, "learning_rate": 1.9455960673002214e-05, "loss": 0.5679, "step": 1485 }, { "epoch": 0.37610731460389774, "grad_norm": 0.14546474814414978, "learning_rate": 1.9455181425342146e-05, "loss": 0.5279, "step": 1486 }, { "epoch": 0.3763604150847887, "grad_norm": 0.14962340891361237, "learning_rate": 1.945440163563658e-05, "loss": 0.547, "step": 1487 }, { "epoch": 0.37661351556567957, "grad_norm": 0.1480061560869217, "learning_rate": 1.9453621303930225e-05, "loss": 0.5455, "step": 1488 }, { "epoch": 0.3768666160465705, "grad_norm": 0.16731922328472137, "learning_rate": 1.9452840430267815e-05, "loss": 0.5802, "step": 1489 }, { "epoch": 0.3771197165274614, "grad_norm": 0.15016759932041168, "learning_rate": 1.9452059014694115e-05, "loss": 0.5399, "step": 1490 }, { "epoch": 0.3773728170083523, "grad_norm": 0.14352989196777344, "learning_rate": 1.945127705725392e-05, "loss": 0.5498, "step": 1491 }, { "epoch": 0.37762591748924323, "grad_norm": 0.16667211055755615, "learning_rate": 1.945049455799206e-05, "loss": 0.5452, "step": 1492 }, { "epoch": 0.3778790179701341, "grad_norm": 0.15130731463432312, "learning_rate": 1.9449711516953394e-05, "loss": 0.5647, "step": 1493 }, { "epoch": 0.37813211845102507, "grad_norm": 0.1445522904396057, "learning_rate": 1.9448927934182812e-05, "loss": 0.5613, "step": 1494 }, { "epoch": 0.37838521893191596, "grad_norm": 0.14771664142608643, "learning_rate": 1.9448143809725234e-05, "loss": 0.5514, "step": 1495 }, { "epoch": 0.3786383194128069, "grad_norm": 0.1448034942150116, "learning_rate": 1.9447359143625614e-05, "loss": 0.5448, "step": 1496 }, { "epoch": 0.3788914198936978, "grad_norm": 0.16282175481319427, "learning_rate": 1.9446573935928937e-05, "loss": 0.5674, "step": 1497 }, { "epoch": 0.37914452037458873, "grad_norm": 0.14873945713043213, "learning_rate": 1.9445788186680214e-05, "loss": 0.5233, "step": 1498 }, { "epoch": 0.3793976208554796, "grad_norm": 0.14670808613300323, "learning_rate": 1.9445001895924486e-05, "loss": 0.5589, "step": 1499 }, { "epoch": 0.37965072133637057, "grad_norm": 0.1473490446805954, "learning_rate": 1.944421506370684e-05, "loss": 0.5664, "step": 1500 }, { "epoch": 0.37990382181726146, "grad_norm": 0.14425857365131378, "learning_rate": 1.9443427690072377e-05, "loss": 0.5402, "step": 1501 }, { "epoch": 0.38015692229815234, "grad_norm": 0.14268651604652405, "learning_rate": 1.9442639775066235e-05, "loss": 0.5331, "step": 1502 }, { "epoch": 0.3804100227790433, "grad_norm": 0.14433452486991882, "learning_rate": 1.9441851318733586e-05, "loss": 0.5492, "step": 1503 }, { "epoch": 0.3806631232599342, "grad_norm": 0.15032480657100677, "learning_rate": 1.9441062321119628e-05, "loss": 0.5567, "step": 1504 }, { "epoch": 0.3809162237408251, "grad_norm": 0.14846478402614594, "learning_rate": 1.9440272782269595e-05, "loss": 0.5623, "step": 1505 }, { "epoch": 0.381169324221716, "grad_norm": 0.15278089046478271, "learning_rate": 1.9439482702228748e-05, "loss": 0.5611, "step": 1506 }, { "epoch": 0.38142242470260695, "grad_norm": 0.15438181161880493, "learning_rate": 1.943869208104238e-05, "loss": 0.5657, "step": 1507 }, { "epoch": 0.38167552518349784, "grad_norm": 0.14381231367588043, "learning_rate": 1.943790091875582e-05, "loss": 0.5571, "step": 1508 }, { "epoch": 0.3819286256643888, "grad_norm": 0.145586296916008, "learning_rate": 1.943710921541442e-05, "loss": 0.5621, "step": 1509 }, { "epoch": 0.3821817261452797, "grad_norm": 0.15460233390331268, "learning_rate": 1.943631697106356e-05, "loss": 0.5711, "step": 1510 }, { "epoch": 0.38243482662617057, "grad_norm": 0.14406736195087433, "learning_rate": 1.9435524185748673e-05, "loss": 0.546, "step": 1511 }, { "epoch": 0.3826879271070615, "grad_norm": 0.14487136900424957, "learning_rate": 1.9434730859515195e-05, "loss": 0.5353, "step": 1512 }, { "epoch": 0.3829410275879524, "grad_norm": 0.1524420529603958, "learning_rate": 1.9433936992408615e-05, "loss": 0.5387, "step": 1513 }, { "epoch": 0.38319412806884334, "grad_norm": 0.14421406388282776, "learning_rate": 1.943314258447443e-05, "loss": 0.5534, "step": 1514 }, { "epoch": 0.38344722854973423, "grad_norm": 0.14695259928703308, "learning_rate": 1.94323476357582e-05, "loss": 0.5437, "step": 1515 }, { "epoch": 0.3837003290306252, "grad_norm": 0.14831073582172394, "learning_rate": 1.9431552146305484e-05, "loss": 0.5444, "step": 1516 }, { "epoch": 0.38395342951151606, "grad_norm": 0.14436882734298706, "learning_rate": 1.9430756116161892e-05, "loss": 0.5358, "step": 1517 }, { "epoch": 0.384206529992407, "grad_norm": 0.15285713970661163, "learning_rate": 1.9429959545373056e-05, "loss": 0.5586, "step": 1518 }, { "epoch": 0.3844596304732979, "grad_norm": 0.14679375290870667, "learning_rate": 1.9429162433984642e-05, "loss": 0.5565, "step": 1519 }, { "epoch": 0.3847127309541888, "grad_norm": 0.14445728063583374, "learning_rate": 1.9428364782042345e-05, "loss": 0.5491, "step": 1520 }, { "epoch": 0.38496583143507973, "grad_norm": 0.14439482986927032, "learning_rate": 1.9427566589591896e-05, "loss": 0.5623, "step": 1521 }, { "epoch": 0.3852189319159706, "grad_norm": 0.14486654102802277, "learning_rate": 1.9426767856679055e-05, "loss": 0.5658, "step": 1522 }, { "epoch": 0.38547203239686156, "grad_norm": 0.14940501749515533, "learning_rate": 1.9425968583349608e-05, "loss": 0.5596, "step": 1523 }, { "epoch": 0.38572513287775245, "grad_norm": 0.14470356702804565, "learning_rate": 1.9425168769649377e-05, "loss": 0.5471, "step": 1524 }, { "epoch": 0.3859782333586434, "grad_norm": 0.1546734869480133, "learning_rate": 1.9424368415624216e-05, "loss": 0.5414, "step": 1525 }, { "epoch": 0.3862313338395343, "grad_norm": 0.14746522903442383, "learning_rate": 1.942356752132e-05, "loss": 0.5369, "step": 1526 }, { "epoch": 0.38648443432042523, "grad_norm": 0.2848914861679077, "learning_rate": 1.942276608678265e-05, "loss": 0.5377, "step": 1527 }, { "epoch": 0.3867375348013161, "grad_norm": 0.14106248319149017, "learning_rate": 1.9421964112058108e-05, "loss": 0.5719, "step": 1528 }, { "epoch": 0.38699063528220706, "grad_norm": 0.15155471861362457, "learning_rate": 1.942116159719235e-05, "loss": 0.5347, "step": 1529 }, { "epoch": 0.38724373576309795, "grad_norm": 0.14718204736709595, "learning_rate": 1.9420358542231383e-05, "loss": 0.5376, "step": 1530 }, { "epoch": 0.38749683624398884, "grad_norm": 0.14534416794776917, "learning_rate": 1.9419554947221245e-05, "loss": 0.5681, "step": 1531 }, { "epoch": 0.3877499367248798, "grad_norm": 0.15082937479019165, "learning_rate": 1.9418750812208002e-05, "loss": 0.5679, "step": 1532 }, { "epoch": 0.3880030372057707, "grad_norm": 0.14653445780277252, "learning_rate": 1.941794613723775e-05, "loss": 0.5427, "step": 1533 }, { "epoch": 0.3882561376866616, "grad_norm": 0.19520319998264313, "learning_rate": 1.9417140922356626e-05, "loss": 0.5413, "step": 1534 }, { "epoch": 0.3885092381675525, "grad_norm": 0.14684267342090607, "learning_rate": 1.9416335167610793e-05, "loss": 0.5604, "step": 1535 }, { "epoch": 0.38876233864844345, "grad_norm": 0.1453063189983368, "learning_rate": 1.9415528873046434e-05, "loss": 0.5403, "step": 1536 }, { "epoch": 0.38901543912933434, "grad_norm": 0.15310315787792206, "learning_rate": 1.941472203870978e-05, "loss": 0.5437, "step": 1537 }, { "epoch": 0.3892685396102253, "grad_norm": 0.1449703723192215, "learning_rate": 1.941391466464708e-05, "loss": 0.5481, "step": 1538 }, { "epoch": 0.3895216400911162, "grad_norm": 0.1476258635520935, "learning_rate": 1.9413106750904623e-05, "loss": 0.5554, "step": 1539 }, { "epoch": 0.38977474057200706, "grad_norm": 0.14646273851394653, "learning_rate": 1.9412298297528727e-05, "loss": 0.5382, "step": 1540 }, { "epoch": 0.390027841052898, "grad_norm": 0.14629773795604706, "learning_rate": 1.9411489304565735e-05, "loss": 0.527, "step": 1541 }, { "epoch": 0.3902809415337889, "grad_norm": 0.14340394735336304, "learning_rate": 1.941067977206202e-05, "loss": 0.5303, "step": 1542 }, { "epoch": 0.39053404201467984, "grad_norm": 0.14393474161624908, "learning_rate": 1.9409869700064e-05, "loss": 0.5402, "step": 1543 }, { "epoch": 0.3907871424955707, "grad_norm": 0.14596255123615265, "learning_rate": 1.9409059088618106e-05, "loss": 0.543, "step": 1544 }, { "epoch": 0.39104024297646167, "grad_norm": 0.14661453664302826, "learning_rate": 1.9408247937770817e-05, "loss": 0.5639, "step": 1545 }, { "epoch": 0.39129334345735256, "grad_norm": 0.1494133323431015, "learning_rate": 1.9407436247568633e-05, "loss": 0.5322, "step": 1546 }, { "epoch": 0.3915464439382435, "grad_norm": 0.15496422350406647, "learning_rate": 1.940662401805808e-05, "loss": 0.5692, "step": 1547 }, { "epoch": 0.3917995444191344, "grad_norm": 0.14746661484241486, "learning_rate": 1.940581124928573e-05, "loss": 0.5359, "step": 1548 }, { "epoch": 0.39205264490002534, "grad_norm": 0.14533816277980804, "learning_rate": 1.940499794129817e-05, "loss": 0.5512, "step": 1549 }, { "epoch": 0.3923057453809162, "grad_norm": 0.14947715401649475, "learning_rate": 1.940418409414203e-05, "loss": 0.5743, "step": 1550 }, { "epoch": 0.3925588458618071, "grad_norm": 0.14406318962574005, "learning_rate": 1.940336970786396e-05, "loss": 0.5474, "step": 1551 }, { "epoch": 0.39281194634269806, "grad_norm": 0.146559938788414, "learning_rate": 1.9402554782510657e-05, "loss": 0.5486, "step": 1552 }, { "epoch": 0.39306504682358895, "grad_norm": 0.16369974613189697, "learning_rate": 1.9401739318128832e-05, "loss": 0.5568, "step": 1553 }, { "epoch": 0.3933181473044799, "grad_norm": 0.14416536688804626, "learning_rate": 1.9400923314765235e-05, "loss": 0.5214, "step": 1554 }, { "epoch": 0.3935712477853708, "grad_norm": 0.14466746151447296, "learning_rate": 1.9400106772466645e-05, "loss": 0.5392, "step": 1555 }, { "epoch": 0.3938243482662617, "grad_norm": 0.15031152963638306, "learning_rate": 1.9399289691279874e-05, "loss": 0.5702, "step": 1556 }, { "epoch": 0.3940774487471526, "grad_norm": 0.14659929275512695, "learning_rate": 1.9398472071251765e-05, "loss": 0.554, "step": 1557 }, { "epoch": 0.39433054922804356, "grad_norm": 0.14912793040275574, "learning_rate": 1.9397653912429187e-05, "loss": 0.5275, "step": 1558 }, { "epoch": 0.39458364970893445, "grad_norm": 0.14978481829166412, "learning_rate": 1.9396835214859044e-05, "loss": 0.5502, "step": 1559 }, { "epoch": 0.39483675018982534, "grad_norm": 0.1445114016532898, "learning_rate": 1.9396015978588273e-05, "loss": 0.5567, "step": 1560 }, { "epoch": 0.3950898506707163, "grad_norm": 0.1440342217683792, "learning_rate": 1.9395196203663837e-05, "loss": 0.5358, "step": 1561 }, { "epoch": 0.39534295115160717, "grad_norm": 0.14023324847221375, "learning_rate": 1.939437589013273e-05, "loss": 0.566, "step": 1562 }, { "epoch": 0.3955960516324981, "grad_norm": 0.14542604982852936, "learning_rate": 1.939355503804198e-05, "loss": 0.5286, "step": 1563 }, { "epoch": 0.395849152113389, "grad_norm": 0.1461302787065506, "learning_rate": 1.939273364743865e-05, "loss": 0.561, "step": 1564 }, { "epoch": 0.39610225259427995, "grad_norm": 0.1431075483560562, "learning_rate": 1.939191171836982e-05, "loss": 0.5478, "step": 1565 }, { "epoch": 0.39635535307517084, "grad_norm": 0.1537792831659317, "learning_rate": 1.939108925088262e-05, "loss": 0.5173, "step": 1566 }, { "epoch": 0.3966084535560618, "grad_norm": 0.1548088788986206, "learning_rate": 1.939026624502419e-05, "loss": 0.5566, "step": 1567 }, { "epoch": 0.39686155403695267, "grad_norm": 0.14292335510253906, "learning_rate": 1.9389442700841714e-05, "loss": 0.5571, "step": 1568 }, { "epoch": 0.39711465451784356, "grad_norm": 0.14941321313381195, "learning_rate": 1.9388618618382405e-05, "loss": 0.561, "step": 1569 }, { "epoch": 0.3973677549987345, "grad_norm": 0.14457207918167114, "learning_rate": 1.938779399769351e-05, "loss": 0.561, "step": 1570 }, { "epoch": 0.3976208554796254, "grad_norm": 0.14793317019939423, "learning_rate": 1.9386968838822296e-05, "loss": 0.5394, "step": 1571 }, { "epoch": 0.39787395596051633, "grad_norm": 0.14733612537384033, "learning_rate": 1.9386143141816075e-05, "loss": 0.5519, "step": 1572 }, { "epoch": 0.3981270564414072, "grad_norm": 0.14648863673210144, "learning_rate": 1.9385316906722173e-05, "loss": 0.5532, "step": 1573 }, { "epoch": 0.39838015692229817, "grad_norm": 0.140003502368927, "learning_rate": 1.938449013358796e-05, "loss": 0.5484, "step": 1574 }, { "epoch": 0.39863325740318906, "grad_norm": 0.14673306047916412, "learning_rate": 1.9383662822460838e-05, "loss": 0.5486, "step": 1575 }, { "epoch": 0.39888635788408, "grad_norm": 0.15231101214885712, "learning_rate": 1.938283497338823e-05, "loss": 0.562, "step": 1576 }, { "epoch": 0.3991394583649709, "grad_norm": 0.14271137118339539, "learning_rate": 1.9382006586417597e-05, "loss": 0.5364, "step": 1577 }, { "epoch": 0.39939255884586183, "grad_norm": 0.15283474326133728, "learning_rate": 1.9381177661596426e-05, "loss": 0.5377, "step": 1578 }, { "epoch": 0.3996456593267527, "grad_norm": 0.1438225507736206, "learning_rate": 1.938034819897224e-05, "loss": 0.5361, "step": 1579 }, { "epoch": 0.3998987598076436, "grad_norm": 0.14464469254016876, "learning_rate": 1.9379518198592593e-05, "loss": 0.549, "step": 1580 }, { "epoch": 0.40015186028853456, "grad_norm": 0.1797109991312027, "learning_rate": 1.9378687660505063e-05, "loss": 0.5717, "step": 1581 }, { "epoch": 0.40040496076942544, "grad_norm": 0.1502915769815445, "learning_rate": 1.9377856584757258e-05, "loss": 0.5535, "step": 1582 }, { "epoch": 0.4006580612503164, "grad_norm": 0.1443057805299759, "learning_rate": 1.9377024971396835e-05, "loss": 0.5183, "step": 1583 }, { "epoch": 0.4009111617312073, "grad_norm": 0.15263494849205017, "learning_rate": 1.937619282047146e-05, "loss": 0.576, "step": 1584 }, { "epoch": 0.4011642622120982, "grad_norm": 0.14717890322208405, "learning_rate": 1.9375360132028836e-05, "loss": 0.5739, "step": 1585 }, { "epoch": 0.4014173626929891, "grad_norm": 0.144087553024292, "learning_rate": 1.9374526906116707e-05, "loss": 0.5565, "step": 1586 }, { "epoch": 0.40167046317388005, "grad_norm": 0.14913515746593475, "learning_rate": 1.9373693142782834e-05, "loss": 0.5462, "step": 1587 }, { "epoch": 0.40192356365477094, "grad_norm": 0.14807447791099548, "learning_rate": 1.9372858842075017e-05, "loss": 0.5827, "step": 1588 }, { "epoch": 0.40217666413566183, "grad_norm": 0.1502460241317749, "learning_rate": 1.9372024004041085e-05, "loss": 0.5567, "step": 1589 }, { "epoch": 0.4024297646165528, "grad_norm": 0.14845815300941467, "learning_rate": 1.9371188628728896e-05, "loss": 0.5603, "step": 1590 }, { "epoch": 0.40268286509744367, "grad_norm": 0.150599405169487, "learning_rate": 1.9370352716186346e-05, "loss": 0.5804, "step": 1591 }, { "epoch": 0.4029359655783346, "grad_norm": 0.1461457461118698, "learning_rate": 1.9369516266461348e-05, "loss": 0.5178, "step": 1592 }, { "epoch": 0.4031890660592255, "grad_norm": 0.14655934274196625, "learning_rate": 1.9368679279601855e-05, "loss": 0.5434, "step": 1593 }, { "epoch": 0.40344216654011644, "grad_norm": 0.14532841742038727, "learning_rate": 1.9367841755655856e-05, "loss": 0.5416, "step": 1594 }, { "epoch": 0.40369526702100733, "grad_norm": 0.14369773864746094, "learning_rate": 1.936700369467136e-05, "loss": 0.5253, "step": 1595 }, { "epoch": 0.4039483675018983, "grad_norm": 0.1581760197877884, "learning_rate": 1.9366165096696412e-05, "loss": 0.5493, "step": 1596 }, { "epoch": 0.40420146798278916, "grad_norm": 0.14258648455142975, "learning_rate": 1.9365325961779085e-05, "loss": 0.5264, "step": 1597 }, { "epoch": 0.4044545684636801, "grad_norm": 0.15081873536109924, "learning_rate": 1.936448628996749e-05, "loss": 0.5568, "step": 1598 }, { "epoch": 0.404707668944571, "grad_norm": 0.14594605565071106, "learning_rate": 1.9363646081309757e-05, "loss": 0.5538, "step": 1599 }, { "epoch": 0.4049607694254619, "grad_norm": 0.1677783578634262, "learning_rate": 1.936280533585406e-05, "loss": 0.5693, "step": 1600 }, { "epoch": 0.40521386990635283, "grad_norm": 0.14986838400363922, "learning_rate": 1.9361964053648594e-05, "loss": 0.5563, "step": 1601 }, { "epoch": 0.4054669703872437, "grad_norm": 0.15021882951259613, "learning_rate": 1.9361122234741585e-05, "loss": 0.5725, "step": 1602 }, { "epoch": 0.40572007086813466, "grad_norm": 0.1499585509300232, "learning_rate": 1.9360279879181294e-05, "loss": 0.5709, "step": 1603 }, { "epoch": 0.40597317134902555, "grad_norm": 0.15348757803440094, "learning_rate": 1.9359436987016016e-05, "loss": 0.5275, "step": 1604 }, { "epoch": 0.4062262718299165, "grad_norm": 0.14585711061954498, "learning_rate": 1.935859355829407e-05, "loss": 0.5352, "step": 1605 }, { "epoch": 0.4064793723108074, "grad_norm": 0.14915527403354645, "learning_rate": 1.9357749593063806e-05, "loss": 0.542, "step": 1606 }, { "epoch": 0.40673247279169833, "grad_norm": 0.15031583607196808, "learning_rate": 1.9356905091373606e-05, "loss": 0.5603, "step": 1607 }, { "epoch": 0.4069855732725892, "grad_norm": 0.14817659556865692, "learning_rate": 1.9356060053271887e-05, "loss": 0.5525, "step": 1608 }, { "epoch": 0.4072386737534801, "grad_norm": 0.14792583882808685, "learning_rate": 1.935521447880709e-05, "loss": 0.5678, "step": 1609 }, { "epoch": 0.40749177423437105, "grad_norm": 0.14879105985164642, "learning_rate": 1.9354368368027696e-05, "loss": 0.5516, "step": 1610 }, { "epoch": 0.40774487471526194, "grad_norm": 0.1414695829153061, "learning_rate": 1.93535217209822e-05, "loss": 0.5196, "step": 1611 }, { "epoch": 0.4079979751961529, "grad_norm": 0.14334243535995483, "learning_rate": 1.9352674537719155e-05, "loss": 0.5462, "step": 1612 }, { "epoch": 0.4082510756770438, "grad_norm": 0.14682495594024658, "learning_rate": 1.9351826818287107e-05, "loss": 0.5489, "step": 1613 }, { "epoch": 0.4085041761579347, "grad_norm": 0.25355473160743713, "learning_rate": 1.935097856273467e-05, "loss": 0.5499, "step": 1614 }, { "epoch": 0.4087572766388256, "grad_norm": 0.14532774686813354, "learning_rate": 1.935012977111047e-05, "loss": 0.5304, "step": 1615 }, { "epoch": 0.40901037711971655, "grad_norm": 0.14377574622631073, "learning_rate": 1.9349280443463157e-05, "loss": 0.5354, "step": 1616 }, { "epoch": 0.40926347760060744, "grad_norm": 0.15197570621967316, "learning_rate": 1.9348430579841437e-05, "loss": 0.5301, "step": 1617 }, { "epoch": 0.40951657808149833, "grad_norm": 0.15150560438632965, "learning_rate": 1.9347580180294015e-05, "loss": 0.5514, "step": 1618 }, { "epoch": 0.4097696785623893, "grad_norm": 0.14334368705749512, "learning_rate": 1.9346729244869654e-05, "loss": 0.5402, "step": 1619 }, { "epoch": 0.41002277904328016, "grad_norm": 0.1479242444038391, "learning_rate": 1.934587777361713e-05, "loss": 0.55, "step": 1620 }, { "epoch": 0.4102758795241711, "grad_norm": 0.14709392189979553, "learning_rate": 1.9345025766585258e-05, "loss": 0.5662, "step": 1621 }, { "epoch": 0.410528980005062, "grad_norm": 0.14203697443008423, "learning_rate": 1.9344173223822883e-05, "loss": 0.5651, "step": 1622 }, { "epoch": 0.41078208048595294, "grad_norm": 0.15245656669139862, "learning_rate": 1.934332014537888e-05, "loss": 0.5257, "step": 1623 }, { "epoch": 0.4110351809668438, "grad_norm": 0.14380770921707153, "learning_rate": 1.9342466531302148e-05, "loss": 0.53, "step": 1624 }, { "epoch": 0.41128828144773477, "grad_norm": 0.14402315020561218, "learning_rate": 1.9341612381641632e-05, "loss": 0.547, "step": 1625 }, { "epoch": 0.41154138192862566, "grad_norm": 0.14973874390125275, "learning_rate": 1.934075769644629e-05, "loss": 0.5315, "step": 1626 }, { "epoch": 0.4117944824095166, "grad_norm": 0.1417977660894394, "learning_rate": 1.9339902475765125e-05, "loss": 0.543, "step": 1627 }, { "epoch": 0.4120475828904075, "grad_norm": 0.14770272374153137, "learning_rate": 1.9339046719647166e-05, "loss": 0.5412, "step": 1628 }, { "epoch": 0.4123006833712984, "grad_norm": 0.14769352972507477, "learning_rate": 1.9338190428141463e-05, "loss": 0.533, "step": 1629 }, { "epoch": 0.4125537838521893, "grad_norm": 0.14572639763355255, "learning_rate": 1.9337333601297118e-05, "loss": 0.5772, "step": 1630 }, { "epoch": 0.4128068843330802, "grad_norm": 0.14324599504470825, "learning_rate": 1.933647623916324e-05, "loss": 0.5535, "step": 1631 }, { "epoch": 0.41305998481397116, "grad_norm": 0.14937616884708405, "learning_rate": 1.9335618341788983e-05, "loss": 0.5634, "step": 1632 }, { "epoch": 0.41331308529486205, "grad_norm": 0.14965523779392242, "learning_rate": 1.9334759909223534e-05, "loss": 0.5319, "step": 1633 }, { "epoch": 0.413566185775753, "grad_norm": 0.1447480022907257, "learning_rate": 1.93339009415161e-05, "loss": 0.5188, "step": 1634 }, { "epoch": 0.4138192862566439, "grad_norm": 0.14657054841518402, "learning_rate": 1.933304143871592e-05, "loss": 0.545, "step": 1635 }, { "epoch": 0.4140723867375348, "grad_norm": 0.15385763347148895, "learning_rate": 1.9332181400872273e-05, "loss": 0.5667, "step": 1636 }, { "epoch": 0.4143254872184257, "grad_norm": 0.14401037991046906, "learning_rate": 1.9331320828034466e-05, "loss": 0.5383, "step": 1637 }, { "epoch": 0.4145785876993166, "grad_norm": 0.15646244585514069, "learning_rate": 1.933045972025183e-05, "loss": 0.5537, "step": 1638 }, { "epoch": 0.41483168818020755, "grad_norm": 0.15087704360485077, "learning_rate": 1.9329598077573727e-05, "loss": 0.543, "step": 1639 }, { "epoch": 0.41508478866109844, "grad_norm": 0.1671897917985916, "learning_rate": 1.932873590004956e-05, "loss": 0.5443, "step": 1640 }, { "epoch": 0.4153378891419894, "grad_norm": 0.1444534808397293, "learning_rate": 1.9327873187728747e-05, "loss": 0.5732, "step": 1641 }, { "epoch": 0.41559098962288027, "grad_norm": 0.1481046974658966, "learning_rate": 1.9327009940660755e-05, "loss": 0.5425, "step": 1642 }, { "epoch": 0.4158440901037712, "grad_norm": 0.19806204736232758, "learning_rate": 1.9326146158895067e-05, "loss": 0.5439, "step": 1643 }, { "epoch": 0.4160971905846621, "grad_norm": 0.1478814035654068, "learning_rate": 1.9325281842481206e-05, "loss": 0.5357, "step": 1644 }, { "epoch": 0.41635029106555305, "grad_norm": 0.15157166123390198, "learning_rate": 1.9324416991468712e-05, "loss": 0.5549, "step": 1645 }, { "epoch": 0.41660339154644394, "grad_norm": 0.15019840002059937, "learning_rate": 1.9323551605907175e-05, "loss": 0.5526, "step": 1646 }, { "epoch": 0.4168564920273349, "grad_norm": 0.1437724530696869, "learning_rate": 1.9322685685846202e-05, "loss": 0.5212, "step": 1647 }, { "epoch": 0.41710959250822577, "grad_norm": 0.15244163572788239, "learning_rate": 1.9321819231335435e-05, "loss": 0.5564, "step": 1648 }, { "epoch": 0.41736269298911666, "grad_norm": 0.15348710119724274, "learning_rate": 1.9320952242424542e-05, "loss": 0.5519, "step": 1649 }, { "epoch": 0.4176157934700076, "grad_norm": 0.14565591514110565, "learning_rate": 1.9320084719163232e-05, "loss": 0.5734, "step": 1650 }, { "epoch": 0.4178688939508985, "grad_norm": 0.15512077510356903, "learning_rate": 1.9319216661601233e-05, "loss": 0.5552, "step": 1651 }, { "epoch": 0.41812199443178943, "grad_norm": 0.15159796178340912, "learning_rate": 1.9318348069788314e-05, "loss": 0.5572, "step": 1652 }, { "epoch": 0.4183750949126803, "grad_norm": 0.1493181437253952, "learning_rate": 1.9317478943774263e-05, "loss": 0.553, "step": 1653 }, { "epoch": 0.41862819539357127, "grad_norm": 0.14635220170021057, "learning_rate": 1.931660928360891e-05, "loss": 0.521, "step": 1654 }, { "epoch": 0.41888129587446216, "grad_norm": 0.14356295764446259, "learning_rate": 1.931573908934211e-05, "loss": 0.5318, "step": 1655 }, { "epoch": 0.4191343963553531, "grad_norm": 0.1549730747938156, "learning_rate": 1.931486836102375e-05, "loss": 0.5262, "step": 1656 }, { "epoch": 0.419387496836244, "grad_norm": 0.1705036610364914, "learning_rate": 1.9313997098703747e-05, "loss": 0.5646, "step": 1657 }, { "epoch": 0.4196405973171349, "grad_norm": 0.14875201880931854, "learning_rate": 1.9313125302432045e-05, "loss": 0.5448, "step": 1658 }, { "epoch": 0.4198936977980258, "grad_norm": 0.16302737593650818, "learning_rate": 1.9312252972258624e-05, "loss": 0.5347, "step": 1659 }, { "epoch": 0.4201467982789167, "grad_norm": 0.1432584524154663, "learning_rate": 1.9311380108233495e-05, "loss": 0.533, "step": 1660 }, { "epoch": 0.42039989875980766, "grad_norm": 0.15105532109737396, "learning_rate": 1.9310506710406696e-05, "loss": 0.5633, "step": 1661 }, { "epoch": 0.42065299924069854, "grad_norm": 0.17110736668109894, "learning_rate": 1.93096327788283e-05, "loss": 0.5322, "step": 1662 }, { "epoch": 0.4209060997215895, "grad_norm": 0.1431974321603775, "learning_rate": 1.9308758313548397e-05, "loss": 0.5432, "step": 1663 }, { "epoch": 0.4211592002024804, "grad_norm": 0.14605098962783813, "learning_rate": 1.9307883314617136e-05, "loss": 0.5548, "step": 1664 }, { "epoch": 0.4214123006833713, "grad_norm": 0.19013462960720062, "learning_rate": 1.9307007782084662e-05, "loss": 0.5568, "step": 1665 }, { "epoch": 0.4216654011642622, "grad_norm": 0.15632565319538116, "learning_rate": 1.9306131716001175e-05, "loss": 0.5494, "step": 1666 }, { "epoch": 0.4219185016451531, "grad_norm": 0.1458219289779663, "learning_rate": 1.93052551164169e-05, "loss": 0.5526, "step": 1667 }, { "epoch": 0.42217160212604404, "grad_norm": 0.14812980592250824, "learning_rate": 1.9304377983382085e-05, "loss": 0.5425, "step": 1668 }, { "epoch": 0.42242470260693493, "grad_norm": 0.14942355453968048, "learning_rate": 1.930350031694702e-05, "loss": 0.5632, "step": 1669 }, { "epoch": 0.4226778030878259, "grad_norm": 0.1527610868215561, "learning_rate": 1.9302622117162015e-05, "loss": 0.5406, "step": 1670 }, { "epoch": 0.42293090356871677, "grad_norm": 0.15162545442581177, "learning_rate": 1.9301743384077416e-05, "loss": 0.5474, "step": 1671 }, { "epoch": 0.4231840040496077, "grad_norm": 0.1516416221857071, "learning_rate": 1.9300864117743602e-05, "loss": 0.5354, "step": 1672 }, { "epoch": 0.4234371045304986, "grad_norm": 0.14847074449062347, "learning_rate": 1.9299984318210977e-05, "loss": 0.5454, "step": 1673 }, { "epoch": 0.42369020501138954, "grad_norm": 0.15890692174434662, "learning_rate": 1.9299103985529977e-05, "loss": 0.5702, "step": 1674 }, { "epoch": 0.42394330549228043, "grad_norm": 0.15740667283535004, "learning_rate": 1.9298223119751076e-05, "loss": 0.5293, "step": 1675 }, { "epoch": 0.4241964059731714, "grad_norm": 0.15397818386554718, "learning_rate": 1.9297341720924762e-05, "loss": 0.5496, "step": 1676 }, { "epoch": 0.42444950645406226, "grad_norm": 0.15494826436042786, "learning_rate": 1.9296459789101574e-05, "loss": 0.5804, "step": 1677 }, { "epoch": 0.42470260693495315, "grad_norm": 0.14730721712112427, "learning_rate": 1.9295577324332062e-05, "loss": 0.5443, "step": 1678 }, { "epoch": 0.4249557074158441, "grad_norm": 0.1446990668773651, "learning_rate": 1.929469432666682e-05, "loss": 0.5542, "step": 1679 }, { "epoch": 0.425208807896735, "grad_norm": 0.14975771307945251, "learning_rate": 1.9293810796156475e-05, "loss": 0.5316, "step": 1680 }, { "epoch": 0.42546190837762593, "grad_norm": 0.14780595898628235, "learning_rate": 1.9292926732851663e-05, "loss": 0.5337, "step": 1681 }, { "epoch": 0.4257150088585168, "grad_norm": 0.14161317050457, "learning_rate": 1.929204213680308e-05, "loss": 0.517, "step": 1682 }, { "epoch": 0.42596810933940776, "grad_norm": 0.14389091730117798, "learning_rate": 1.929115700806143e-05, "loss": 0.5502, "step": 1683 }, { "epoch": 0.42622120982029865, "grad_norm": 0.1470528095960617, "learning_rate": 1.9290271346677458e-05, "loss": 0.5359, "step": 1684 }, { "epoch": 0.4264743103011896, "grad_norm": 0.1421509087085724, "learning_rate": 1.9289385152701935e-05, "loss": 0.5495, "step": 1685 }, { "epoch": 0.4267274107820805, "grad_norm": 0.1501965969800949, "learning_rate": 1.9288498426185665e-05, "loss": 0.5387, "step": 1686 }, { "epoch": 0.4269805112629714, "grad_norm": 0.14985966682434082, "learning_rate": 1.9287611167179484e-05, "loss": 0.5687, "step": 1687 }, { "epoch": 0.4272336117438623, "grad_norm": 0.14769534766674042, "learning_rate": 1.928672337573426e-05, "loss": 0.5509, "step": 1688 }, { "epoch": 0.4274867122247532, "grad_norm": 0.14639385044574738, "learning_rate": 1.9285835051900883e-05, "loss": 0.5462, "step": 1689 }, { "epoch": 0.42773981270564415, "grad_norm": 0.14832186698913574, "learning_rate": 1.9284946195730278e-05, "loss": 0.5421, "step": 1690 }, { "epoch": 0.42799291318653504, "grad_norm": 0.14823070168495178, "learning_rate": 1.9284056807273404e-05, "loss": 0.5426, "step": 1691 }, { "epoch": 0.428246013667426, "grad_norm": 0.14643365144729614, "learning_rate": 1.9283166886581247e-05, "loss": 0.5626, "step": 1692 }, { "epoch": 0.4284991141483169, "grad_norm": 0.1499713808298111, "learning_rate": 1.9282276433704824e-05, "loss": 0.5682, "step": 1693 }, { "epoch": 0.4287522146292078, "grad_norm": 0.1642482578754425, "learning_rate": 1.9281385448695182e-05, "loss": 0.5255, "step": 1694 }, { "epoch": 0.4290053151100987, "grad_norm": 0.14676684141159058, "learning_rate": 1.9280493931603404e-05, "loss": 0.5685, "step": 1695 }, { "epoch": 0.42925841559098965, "grad_norm": 0.1522381454706192, "learning_rate": 1.9279601882480592e-05, "loss": 0.5644, "step": 1696 }, { "epoch": 0.42951151607188054, "grad_norm": 0.14444655179977417, "learning_rate": 1.927870930137789e-05, "loss": 0.5436, "step": 1697 }, { "epoch": 0.42976461655277143, "grad_norm": 0.14501915872097015, "learning_rate": 1.9277816188346464e-05, "loss": 0.5218, "step": 1698 }, { "epoch": 0.43001771703366237, "grad_norm": 0.14758257567882538, "learning_rate": 1.9276922543437516e-05, "loss": 0.5536, "step": 1699 }, { "epoch": 0.43027081751455326, "grad_norm": 0.150035098195076, "learning_rate": 1.927602836670228e-05, "loss": 0.5363, "step": 1700 }, { "epoch": 0.4305239179954442, "grad_norm": 0.13837407529354095, "learning_rate": 1.9275133658192015e-05, "loss": 0.5222, "step": 1701 }, { "epoch": 0.4307770184763351, "grad_norm": 0.15080954134464264, "learning_rate": 1.927423841795801e-05, "loss": 0.5429, "step": 1702 }, { "epoch": 0.43103011895722604, "grad_norm": 0.15365765988826752, "learning_rate": 1.927334264605159e-05, "loss": 0.5326, "step": 1703 }, { "epoch": 0.4312832194381169, "grad_norm": 0.159660205245018, "learning_rate": 1.9272446342524106e-05, "loss": 0.5433, "step": 1704 }, { "epoch": 0.43153631991900787, "grad_norm": 0.15141738951206207, "learning_rate": 1.9271549507426943e-05, "loss": 0.5516, "step": 1705 }, { "epoch": 0.43178942039989876, "grad_norm": 0.1540350466966629, "learning_rate": 1.9270652140811513e-05, "loss": 0.5618, "step": 1706 }, { "epoch": 0.43204252088078965, "grad_norm": 0.14833320677280426, "learning_rate": 1.9269754242729265e-05, "loss": 0.5541, "step": 1707 }, { "epoch": 0.4322956213616806, "grad_norm": 0.1516059786081314, "learning_rate": 1.9268855813231665e-05, "loss": 0.5626, "step": 1708 }, { "epoch": 0.4325487218425715, "grad_norm": 0.1475950926542282, "learning_rate": 1.9267956852370226e-05, "loss": 0.5293, "step": 1709 }, { "epoch": 0.4328018223234624, "grad_norm": 0.1647576540708542, "learning_rate": 1.9267057360196478e-05, "loss": 0.5369, "step": 1710 }, { "epoch": 0.4330549228043533, "grad_norm": 0.14087119698524475, "learning_rate": 1.926615733676199e-05, "loss": 0.5426, "step": 1711 }, { "epoch": 0.43330802328524426, "grad_norm": 0.1437087506055832, "learning_rate": 1.926525678211836e-05, "loss": 0.5482, "step": 1712 }, { "epoch": 0.43356112376613515, "grad_norm": 0.17049936950206757, "learning_rate": 1.926435569631721e-05, "loss": 0.5395, "step": 1713 }, { "epoch": 0.4338142242470261, "grad_norm": 0.16222238540649414, "learning_rate": 1.92634540794102e-05, "loss": 0.568, "step": 1714 }, { "epoch": 0.434067324727917, "grad_norm": 0.14295589923858643, "learning_rate": 1.926255193144902e-05, "loss": 0.5175, "step": 1715 }, { "epoch": 0.43432042520880787, "grad_norm": 0.1490984857082367, "learning_rate": 1.9261649252485383e-05, "loss": 0.5408, "step": 1716 }, { "epoch": 0.4345735256896988, "grad_norm": 0.15359872579574585, "learning_rate": 1.9260746042571038e-05, "loss": 0.5514, "step": 1717 }, { "epoch": 0.4348266261705897, "grad_norm": 0.14625461399555206, "learning_rate": 1.925984230175777e-05, "loss": 0.5183, "step": 1718 }, { "epoch": 0.43507972665148065, "grad_norm": 0.1523420363664627, "learning_rate": 1.9258938030097388e-05, "loss": 0.5517, "step": 1719 }, { "epoch": 0.43533282713237154, "grad_norm": 0.14779441058635712, "learning_rate": 1.9258033227641725e-05, "loss": 0.5505, "step": 1720 }, { "epoch": 0.4355859276132625, "grad_norm": 0.14362023770809174, "learning_rate": 1.9257127894442658e-05, "loss": 0.5055, "step": 1721 }, { "epoch": 0.43583902809415337, "grad_norm": 0.14773982763290405, "learning_rate": 1.9256222030552086e-05, "loss": 0.5672, "step": 1722 }, { "epoch": 0.4360921285750443, "grad_norm": 0.14201810956001282, "learning_rate": 1.9255315636021935e-05, "loss": 0.5279, "step": 1723 }, { "epoch": 0.4363452290559352, "grad_norm": 0.1453840434551239, "learning_rate": 1.9254408710904177e-05, "loss": 0.535, "step": 1724 }, { "epoch": 0.43659832953682615, "grad_norm": 0.15022654831409454, "learning_rate": 1.9253501255250794e-05, "loss": 0.5481, "step": 1725 }, { "epoch": 0.43685143001771704, "grad_norm": 0.14555461704730988, "learning_rate": 1.9252593269113816e-05, "loss": 0.5605, "step": 1726 }, { "epoch": 0.4371045304986079, "grad_norm": 0.16268178820610046, "learning_rate": 1.925168475254529e-05, "loss": 0.5463, "step": 1727 }, { "epoch": 0.43735763097949887, "grad_norm": 0.1464332938194275, "learning_rate": 1.9250775705597307e-05, "loss": 0.5602, "step": 1728 }, { "epoch": 0.43761073146038976, "grad_norm": 0.1467064619064331, "learning_rate": 1.9249866128321972e-05, "loss": 0.5525, "step": 1729 }, { "epoch": 0.4378638319412807, "grad_norm": 0.17261680960655212, "learning_rate": 1.9248956020771434e-05, "loss": 0.5433, "step": 1730 }, { "epoch": 0.4381169324221716, "grad_norm": 0.15159925818443298, "learning_rate": 1.9248045382997866e-05, "loss": 0.5539, "step": 1731 }, { "epoch": 0.43837003290306253, "grad_norm": 0.1457923799753189, "learning_rate": 1.9247134215053477e-05, "loss": 0.549, "step": 1732 }, { "epoch": 0.4386231333839534, "grad_norm": 0.14411090314388275, "learning_rate": 1.9246222516990495e-05, "loss": 0.5325, "step": 1733 }, { "epoch": 0.43887623386484437, "grad_norm": 0.1534961760044098, "learning_rate": 1.924531028886119e-05, "loss": 0.5479, "step": 1734 }, { "epoch": 0.43912933434573526, "grad_norm": 0.1493685394525528, "learning_rate": 1.924439753071786e-05, "loss": 0.5513, "step": 1735 }, { "epoch": 0.43938243482662614, "grad_norm": 0.14418727159500122, "learning_rate": 1.9243484242612827e-05, "loss": 0.5372, "step": 1736 }, { "epoch": 0.4396355353075171, "grad_norm": 0.14740322530269623, "learning_rate": 1.924257042459845e-05, "loss": 0.5319, "step": 1737 }, { "epoch": 0.439888635788408, "grad_norm": 0.14894923567771912, "learning_rate": 1.9241656076727116e-05, "loss": 0.5773, "step": 1738 }, { "epoch": 0.4401417362692989, "grad_norm": 0.14886252582073212, "learning_rate": 1.9240741199051244e-05, "loss": 0.5588, "step": 1739 }, { "epoch": 0.4403948367501898, "grad_norm": 0.14675700664520264, "learning_rate": 1.923982579162328e-05, "loss": 0.5579, "step": 1740 }, { "epoch": 0.44064793723108076, "grad_norm": 0.15053215622901917, "learning_rate": 1.9238909854495705e-05, "loss": 0.5507, "step": 1741 }, { "epoch": 0.44090103771197164, "grad_norm": 0.1464221179485321, "learning_rate": 1.9237993387721026e-05, "loss": 0.5213, "step": 1742 }, { "epoch": 0.4411541381928626, "grad_norm": 0.15776830911636353, "learning_rate": 1.923707639135178e-05, "loss": 0.5412, "step": 1743 }, { "epoch": 0.4414072386737535, "grad_norm": 0.14986103773117065, "learning_rate": 1.923615886544054e-05, "loss": 0.5156, "step": 1744 }, { "epoch": 0.4416603391546444, "grad_norm": 0.16009564697742462, "learning_rate": 1.9235240810039903e-05, "loss": 0.529, "step": 1745 }, { "epoch": 0.4419134396355353, "grad_norm": 0.1472238302230835, "learning_rate": 1.9234322225202503e-05, "loss": 0.5621, "step": 1746 }, { "epoch": 0.4421665401164262, "grad_norm": 0.1461733728647232, "learning_rate": 1.9233403110980997e-05, "loss": 0.5306, "step": 1747 }, { "epoch": 0.44241964059731714, "grad_norm": 0.15014013648033142, "learning_rate": 1.9232483467428077e-05, "loss": 0.5384, "step": 1748 }, { "epoch": 0.44267274107820803, "grad_norm": 0.14777792990207672, "learning_rate": 1.9231563294596466e-05, "loss": 0.5613, "step": 1749 }, { "epoch": 0.442925841559099, "grad_norm": 0.15301355719566345, "learning_rate": 1.923064259253891e-05, "loss": 0.5542, "step": 1750 }, { "epoch": 0.44317894203998986, "grad_norm": 0.16541852056980133, "learning_rate": 1.9229721361308196e-05, "loss": 0.5659, "step": 1751 }, { "epoch": 0.4434320425208808, "grad_norm": 0.15273579955101013, "learning_rate": 1.9228799600957132e-05, "loss": 0.5443, "step": 1752 }, { "epoch": 0.4436851430017717, "grad_norm": 0.14688630402088165, "learning_rate": 1.922787731153857e-05, "loss": 0.569, "step": 1753 }, { "epoch": 0.44393824348266264, "grad_norm": 0.14831644296646118, "learning_rate": 1.922695449310537e-05, "loss": 0.5373, "step": 1754 }, { "epoch": 0.44419134396355353, "grad_norm": 0.1455741822719574, "learning_rate": 1.9226031145710443e-05, "loss": 0.5579, "step": 1755 }, { "epoch": 0.4444444444444444, "grad_norm": 0.1440512090921402, "learning_rate": 1.9225107269406726e-05, "loss": 0.539, "step": 1756 }, { "epoch": 0.44469754492533536, "grad_norm": 0.15093894302845, "learning_rate": 1.9224182864247173e-05, "loss": 0.5554, "step": 1757 }, { "epoch": 0.44495064540622625, "grad_norm": 0.14180652797222137, "learning_rate": 1.9223257930284785e-05, "loss": 0.5351, "step": 1758 }, { "epoch": 0.4452037458871172, "grad_norm": 0.16017498075962067, "learning_rate": 1.9222332467572583e-05, "loss": 0.5538, "step": 1759 }, { "epoch": 0.4454568463680081, "grad_norm": 0.14740543067455292, "learning_rate": 1.9221406476163625e-05, "loss": 0.5569, "step": 1760 }, { "epoch": 0.44570994684889903, "grad_norm": 0.14455154538154602, "learning_rate": 1.9220479956110997e-05, "loss": 0.5332, "step": 1761 }, { "epoch": 0.4459630473297899, "grad_norm": 0.14225231111049652, "learning_rate": 1.9219552907467806e-05, "loss": 0.552, "step": 1762 }, { "epoch": 0.44621614781068086, "grad_norm": 0.1488070785999298, "learning_rate": 1.9218625330287207e-05, "loss": 0.5376, "step": 1763 }, { "epoch": 0.44646924829157175, "grad_norm": 0.1501726508140564, "learning_rate": 1.9217697224622373e-05, "loss": 0.5524, "step": 1764 }, { "epoch": 0.44672234877246264, "grad_norm": 0.14537248015403748, "learning_rate": 1.921676859052651e-05, "loss": 0.5589, "step": 1765 }, { "epoch": 0.4469754492533536, "grad_norm": 0.1483267843723297, "learning_rate": 1.9215839428052853e-05, "loss": 0.5697, "step": 1766 }, { "epoch": 0.4472285497342445, "grad_norm": 0.15355317294597626, "learning_rate": 1.921490973725467e-05, "loss": 0.5543, "step": 1767 }, { "epoch": 0.4474816502151354, "grad_norm": 0.15834340453147888, "learning_rate": 1.9213979518185265e-05, "loss": 0.5474, "step": 1768 }, { "epoch": 0.4477347506960263, "grad_norm": 0.14593762159347534, "learning_rate": 1.921304877089795e-05, "loss": 0.5595, "step": 1769 }, { "epoch": 0.44798785117691725, "grad_norm": 0.1434435397386551, "learning_rate": 1.92121174954461e-05, "loss": 0.5335, "step": 1770 }, { "epoch": 0.44824095165780814, "grad_norm": 0.19594144821166992, "learning_rate": 1.9211185691883094e-05, "loss": 0.5765, "step": 1771 }, { "epoch": 0.4484940521386991, "grad_norm": 0.14419157803058624, "learning_rate": 1.9210253360262346e-05, "loss": 0.533, "step": 1772 }, { "epoch": 0.44874715261959, "grad_norm": 0.1428258717060089, "learning_rate": 1.9209320500637316e-05, "loss": 0.5276, "step": 1773 }, { "epoch": 0.4490002531004809, "grad_norm": 0.1456083357334137, "learning_rate": 1.9208387113061475e-05, "loss": 0.5512, "step": 1774 }, { "epoch": 0.4492533535813718, "grad_norm": 0.14498069882392883, "learning_rate": 1.9207453197588335e-05, "loss": 0.5324, "step": 1775 }, { "epoch": 0.4495064540622627, "grad_norm": 0.14620797336101532, "learning_rate": 1.9206518754271434e-05, "loss": 0.5346, "step": 1776 }, { "epoch": 0.44975955454315364, "grad_norm": 0.1486203372478485, "learning_rate": 1.920558378316434e-05, "loss": 0.5358, "step": 1777 }, { "epoch": 0.45001265502404453, "grad_norm": 0.17899760603904724, "learning_rate": 1.9204648284320656e-05, "loss": 0.5691, "step": 1778 }, { "epoch": 0.45026575550493547, "grad_norm": 0.15098373591899872, "learning_rate": 1.9203712257794013e-05, "loss": 0.559, "step": 1779 }, { "epoch": 0.45051885598582636, "grad_norm": 0.14738845825195312, "learning_rate": 1.9202775703638066e-05, "loss": 0.5347, "step": 1780 }, { "epoch": 0.4507719564667173, "grad_norm": 0.1463220864534378, "learning_rate": 1.920183862190651e-05, "loss": 0.5438, "step": 1781 }, { "epoch": 0.4510250569476082, "grad_norm": 0.14913417398929596, "learning_rate": 1.9200901012653067e-05, "loss": 0.5127, "step": 1782 }, { "epoch": 0.45127815742849914, "grad_norm": 0.143700510263443, "learning_rate": 1.9199962875931487e-05, "loss": 0.5456, "step": 1783 }, { "epoch": 0.45153125790939, "grad_norm": 0.14566923677921295, "learning_rate": 1.919902421179555e-05, "loss": 0.5535, "step": 1784 }, { "epoch": 0.4517843583902809, "grad_norm": 0.16239500045776367, "learning_rate": 1.9198085020299067e-05, "loss": 0.5576, "step": 1785 }, { "epoch": 0.45203745887117186, "grad_norm": 0.1463373899459839, "learning_rate": 1.919714530149588e-05, "loss": 0.5507, "step": 1786 }, { "epoch": 0.45229055935206275, "grad_norm": 0.15224839746952057, "learning_rate": 1.9196205055439866e-05, "loss": 0.53, "step": 1787 }, { "epoch": 0.4525436598329537, "grad_norm": 0.18018022179603577, "learning_rate": 1.9195264282184924e-05, "loss": 0.5549, "step": 1788 }, { "epoch": 0.4527967603138446, "grad_norm": 0.14686517417430878, "learning_rate": 1.919432298178498e-05, "loss": 0.5316, "step": 1789 }, { "epoch": 0.4530498607947355, "grad_norm": 0.15180079638957977, "learning_rate": 1.9193381154294007e-05, "loss": 0.5711, "step": 1790 }, { "epoch": 0.4533029612756264, "grad_norm": 0.14719291031360626, "learning_rate": 1.9192438799765995e-05, "loss": 0.5459, "step": 1791 }, { "epoch": 0.45355606175651736, "grad_norm": 0.1472300887107849, "learning_rate": 1.9191495918254964e-05, "loss": 0.5458, "step": 1792 }, { "epoch": 0.45380916223740825, "grad_norm": 0.14407874643802643, "learning_rate": 1.919055250981497e-05, "loss": 0.5088, "step": 1793 }, { "epoch": 0.4540622627182992, "grad_norm": 0.16175606846809387, "learning_rate": 1.9189608574500095e-05, "loss": 0.5237, "step": 1794 }, { "epoch": 0.4543153631991901, "grad_norm": 0.14462268352508545, "learning_rate": 1.9188664112364456e-05, "loss": 0.5458, "step": 1795 }, { "epoch": 0.45456846368008097, "grad_norm": 0.14778609573841095, "learning_rate": 1.9187719123462192e-05, "loss": 0.5306, "step": 1796 }, { "epoch": 0.4548215641609719, "grad_norm": 0.14949549734592438, "learning_rate": 1.9186773607847482e-05, "loss": 0.5617, "step": 1797 }, { "epoch": 0.4550746646418628, "grad_norm": 0.19620174169540405, "learning_rate": 1.9185827565574526e-05, "loss": 0.5321, "step": 1798 }, { "epoch": 0.45532776512275375, "grad_norm": 0.1574760526418686, "learning_rate": 1.9184880996697562e-05, "loss": 0.5657, "step": 1799 }, { "epoch": 0.45558086560364464, "grad_norm": 0.14064092934131622, "learning_rate": 1.9183933901270857e-05, "loss": 0.5031, "step": 1800 }, { "epoch": 0.4558339660845356, "grad_norm": 0.14611414074897766, "learning_rate": 1.9182986279348702e-05, "loss": 0.5557, "step": 1801 }, { "epoch": 0.45608706656542647, "grad_norm": 0.1446884572505951, "learning_rate": 1.918203813098542e-05, "loss": 0.5143, "step": 1802 }, { "epoch": 0.4563401670463174, "grad_norm": 0.14710043370723724, "learning_rate": 1.9181089456235373e-05, "loss": 0.5441, "step": 1803 }, { "epoch": 0.4565932675272083, "grad_norm": 0.14978434145450592, "learning_rate": 1.918014025515294e-05, "loss": 0.5581, "step": 1804 }, { "epoch": 0.4568463680080992, "grad_norm": 0.1453111171722412, "learning_rate": 1.917919052779254e-05, "loss": 0.5704, "step": 1805 }, { "epoch": 0.45709946848899013, "grad_norm": 0.1428987681865692, "learning_rate": 1.9178240274208616e-05, "loss": 0.5422, "step": 1806 }, { "epoch": 0.457352568969881, "grad_norm": 0.14089812338352203, "learning_rate": 1.917728949445565e-05, "loss": 0.5229, "step": 1807 }, { "epoch": 0.45760566945077197, "grad_norm": 0.15608355402946472, "learning_rate": 1.917633818858814e-05, "loss": 0.5376, "step": 1808 }, { "epoch": 0.45785876993166286, "grad_norm": 0.14855574071407318, "learning_rate": 1.9175386356660632e-05, "loss": 0.5576, "step": 1809 }, { "epoch": 0.4581118704125538, "grad_norm": 0.15267117321491241, "learning_rate": 1.917443399872769e-05, "loss": 0.5298, "step": 1810 }, { "epoch": 0.4583649708934447, "grad_norm": 0.14579527080059052, "learning_rate": 1.9173481114843898e-05, "loss": 0.5365, "step": 1811 }, { "epoch": 0.45861807137433563, "grad_norm": 0.15329217910766602, "learning_rate": 1.91725277050639e-05, "loss": 0.5436, "step": 1812 }, { "epoch": 0.4588711718552265, "grad_norm": 0.14536508917808533, "learning_rate": 1.9171573769442348e-05, "loss": 0.5544, "step": 1813 }, { "epoch": 0.4591242723361174, "grad_norm": 0.147845059633255, "learning_rate": 1.917061930803392e-05, "loss": 0.528, "step": 1814 }, { "epoch": 0.45937737281700836, "grad_norm": 0.14528067409992218, "learning_rate": 1.9169664320893345e-05, "loss": 0.5697, "step": 1815 }, { "epoch": 0.45963047329789924, "grad_norm": 0.15772676467895508, "learning_rate": 1.916870880807536e-05, "loss": 0.5549, "step": 1816 }, { "epoch": 0.4598835737787902, "grad_norm": 0.1482834368944168, "learning_rate": 1.9167752769634754e-05, "loss": 0.5628, "step": 1817 }, { "epoch": 0.4601366742596811, "grad_norm": 0.1466667354106903, "learning_rate": 1.9166796205626328e-05, "loss": 0.5327, "step": 1818 }, { "epoch": 0.460389774740572, "grad_norm": 0.14505578577518463, "learning_rate": 1.916583911610492e-05, "loss": 0.5499, "step": 1819 }, { "epoch": 0.4606428752214629, "grad_norm": 0.1450030505657196, "learning_rate": 1.9164881501125398e-05, "loss": 0.5536, "step": 1820 }, { "epoch": 0.46089597570235386, "grad_norm": 0.1456303894519806, "learning_rate": 1.916392336074266e-05, "loss": 0.5356, "step": 1821 }, { "epoch": 0.46114907618324474, "grad_norm": 0.1493786871433258, "learning_rate": 1.9162964695011635e-05, "loss": 0.5576, "step": 1822 }, { "epoch": 0.4614021766641357, "grad_norm": 0.14639952778816223, "learning_rate": 1.916200550398728e-05, "loss": 0.5622, "step": 1823 }, { "epoch": 0.4616552771450266, "grad_norm": 0.14597994089126587, "learning_rate": 1.916104578772459e-05, "loss": 0.5695, "step": 1824 }, { "epoch": 0.46190837762591747, "grad_norm": 0.15567152202129364, "learning_rate": 1.916008554627857e-05, "loss": 0.5363, "step": 1825 }, { "epoch": 0.4621614781068084, "grad_norm": 0.14252327382564545, "learning_rate": 1.915912477970428e-05, "loss": 0.5514, "step": 1826 }, { "epoch": 0.4624145785876993, "grad_norm": 0.14835482835769653, "learning_rate": 1.9158163488056794e-05, "loss": 0.5464, "step": 1827 }, { "epoch": 0.46266767906859024, "grad_norm": 0.1697893738746643, "learning_rate": 1.9157201671391222e-05, "loss": 0.5105, "step": 1828 }, { "epoch": 0.46292077954948113, "grad_norm": 0.14517392218112946, "learning_rate": 1.91562393297627e-05, "loss": 0.5348, "step": 1829 }, { "epoch": 0.4631738800303721, "grad_norm": 0.1532156765460968, "learning_rate": 1.9155276463226405e-05, "loss": 0.5373, "step": 1830 }, { "epoch": 0.46342698051126296, "grad_norm": 0.15020602941513062, "learning_rate": 1.915431307183753e-05, "loss": 0.5449, "step": 1831 }, { "epoch": 0.4636800809921539, "grad_norm": 0.15006892383098602, "learning_rate": 1.9153349155651305e-05, "loss": 0.5433, "step": 1832 }, { "epoch": 0.4639331814730448, "grad_norm": 0.15749923884868622, "learning_rate": 1.915238471472299e-05, "loss": 0.5485, "step": 1833 }, { "epoch": 0.4641862819539357, "grad_norm": 0.1460724174976349, "learning_rate": 1.915141974910787e-05, "loss": 0.5582, "step": 1834 }, { "epoch": 0.46443938243482663, "grad_norm": 0.15296821296215057, "learning_rate": 1.915045425886127e-05, "loss": 0.5241, "step": 1835 }, { "epoch": 0.4646924829157175, "grad_norm": 0.1486324518918991, "learning_rate": 1.914948824403854e-05, "loss": 0.542, "step": 1836 }, { "epoch": 0.46494558339660846, "grad_norm": 0.16442471742630005, "learning_rate": 1.9148521704695056e-05, "loss": 0.5169, "step": 1837 }, { "epoch": 0.46519868387749935, "grad_norm": 0.14902648329734802, "learning_rate": 1.914755464088623e-05, "loss": 0.5619, "step": 1838 }, { "epoch": 0.4654517843583903, "grad_norm": 0.17024584114551544, "learning_rate": 1.9146587052667497e-05, "loss": 0.5473, "step": 1839 }, { "epoch": 0.4657048848392812, "grad_norm": 0.1540568619966507, "learning_rate": 1.9145618940094334e-05, "loss": 0.5338, "step": 1840 }, { "epoch": 0.46595798532017213, "grad_norm": 0.15819701552391052, "learning_rate": 1.9144650303222233e-05, "loss": 0.5662, "step": 1841 }, { "epoch": 0.466211085801063, "grad_norm": 0.15010058879852295, "learning_rate": 1.914368114210673e-05, "loss": 0.5713, "step": 1842 }, { "epoch": 0.46646418628195396, "grad_norm": 0.15871453285217285, "learning_rate": 1.9142711456803383e-05, "loss": 0.5358, "step": 1843 }, { "epoch": 0.46671728676284485, "grad_norm": 0.1482570320367813, "learning_rate": 1.9141741247367785e-05, "loss": 0.5361, "step": 1844 }, { "epoch": 0.46697038724373574, "grad_norm": 0.1534900814294815, "learning_rate": 1.9140770513855552e-05, "loss": 0.535, "step": 1845 }, { "epoch": 0.4672234877246267, "grad_norm": 0.1464172750711441, "learning_rate": 1.9139799256322334e-05, "loss": 0.5098, "step": 1846 }, { "epoch": 0.4674765882055176, "grad_norm": 0.148983895778656, "learning_rate": 1.913882747482381e-05, "loss": 0.5457, "step": 1847 }, { "epoch": 0.4677296886864085, "grad_norm": 0.15446853637695312, "learning_rate": 1.91378551694157e-05, "loss": 0.5631, "step": 1848 }, { "epoch": 0.4679827891672994, "grad_norm": 0.14498282968997955, "learning_rate": 1.913688234015373e-05, "loss": 0.5376, "step": 1849 }, { "epoch": 0.46823588964819035, "grad_norm": 0.14600622653961182, "learning_rate": 1.913590898709368e-05, "loss": 0.5287, "step": 1850 }, { "epoch": 0.46848899012908124, "grad_norm": 0.14623919129371643, "learning_rate": 1.9134935110291345e-05, "loss": 0.5239, "step": 1851 }, { "epoch": 0.4687420906099722, "grad_norm": 0.15560786426067352, "learning_rate": 1.913396070980256e-05, "loss": 0.5311, "step": 1852 }, { "epoch": 0.4689951910908631, "grad_norm": 0.17758597433567047, "learning_rate": 1.913298578568318e-05, "loss": 0.5569, "step": 1853 }, { "epoch": 0.46924829157175396, "grad_norm": 0.14047466218471527, "learning_rate": 1.91320103379891e-05, "loss": 0.485, "step": 1854 }, { "epoch": 0.4695013920526449, "grad_norm": 0.1628057211637497, "learning_rate": 1.913103436677624e-05, "loss": 0.5464, "step": 1855 }, { "epoch": 0.4697544925335358, "grad_norm": 0.15403884649276733, "learning_rate": 1.9130057872100546e-05, "loss": 0.5315, "step": 1856 }, { "epoch": 0.47000759301442674, "grad_norm": 0.14413578808307648, "learning_rate": 1.9129080854018003e-05, "loss": 0.5402, "step": 1857 }, { "epoch": 0.4702606934953176, "grad_norm": 0.14322863519191742, "learning_rate": 1.912810331258462e-05, "loss": 0.5062, "step": 1858 }, { "epoch": 0.47051379397620857, "grad_norm": 0.15314650535583496, "learning_rate": 1.9127125247856437e-05, "loss": 0.5432, "step": 1859 }, { "epoch": 0.47076689445709946, "grad_norm": 0.13958516716957092, "learning_rate": 1.912614665988952e-05, "loss": 0.5348, "step": 1860 }, { "epoch": 0.4710199949379904, "grad_norm": 0.1479014754295349, "learning_rate": 1.912516754873998e-05, "loss": 0.5773, "step": 1861 }, { "epoch": 0.4712730954188813, "grad_norm": 0.1426335573196411, "learning_rate": 1.9124187914463936e-05, "loss": 0.5176, "step": 1862 }, { "epoch": 0.4715261958997722, "grad_norm": 0.14238254725933075, "learning_rate": 1.9123207757117557e-05, "loss": 0.5656, "step": 1863 }, { "epoch": 0.4717792963806631, "grad_norm": 0.14684829115867615, "learning_rate": 1.912222707675703e-05, "loss": 0.5471, "step": 1864 }, { "epoch": 0.472032396861554, "grad_norm": 0.15091629326343536, "learning_rate": 1.912124587343857e-05, "loss": 0.5219, "step": 1865 }, { "epoch": 0.47228549734244496, "grad_norm": 0.14322194457054138, "learning_rate": 1.9120264147218438e-05, "loss": 0.5373, "step": 1866 }, { "epoch": 0.47253859782333585, "grad_norm": 0.15718382596969604, "learning_rate": 1.9119281898152907e-05, "loss": 0.5386, "step": 1867 }, { "epoch": 0.4727916983042268, "grad_norm": 0.15968430042266846, "learning_rate": 1.911829912629829e-05, "loss": 0.5569, "step": 1868 }, { "epoch": 0.4730447987851177, "grad_norm": 0.14726126194000244, "learning_rate": 1.9117315831710926e-05, "loss": 0.5441, "step": 1869 }, { "epoch": 0.4732978992660086, "grad_norm": 0.14613080024719238, "learning_rate": 1.9116332014447187e-05, "loss": 0.5269, "step": 1870 }, { "epoch": 0.4735509997468995, "grad_norm": 0.1488690972328186, "learning_rate": 1.9115347674563468e-05, "loss": 0.5511, "step": 1871 }, { "epoch": 0.47380410022779046, "grad_norm": 0.14886198937892914, "learning_rate": 1.9114362812116203e-05, "loss": 0.5406, "step": 1872 }, { "epoch": 0.47405720070868135, "grad_norm": 0.14333635568618774, "learning_rate": 1.9113377427161852e-05, "loss": 0.5205, "step": 1873 }, { "epoch": 0.47431030118957224, "grad_norm": 0.148750901222229, "learning_rate": 1.9112391519756905e-05, "loss": 0.5591, "step": 1874 }, { "epoch": 0.4745634016704632, "grad_norm": 0.15048296749591827, "learning_rate": 1.9111405089957886e-05, "loss": 0.535, "step": 1875 }, { "epoch": 0.47481650215135407, "grad_norm": 0.15181617438793182, "learning_rate": 1.9110418137821332e-05, "loss": 0.5396, "step": 1876 }, { "epoch": 0.475069602632245, "grad_norm": 0.15228953957557678, "learning_rate": 1.910943066340384e-05, "loss": 0.5433, "step": 1877 }, { "epoch": 0.4753227031131359, "grad_norm": 0.14914223551750183, "learning_rate": 1.9108442666762008e-05, "loss": 0.5206, "step": 1878 }, { "epoch": 0.47557580359402685, "grad_norm": 0.14637982845306396, "learning_rate": 1.910745414795248e-05, "loss": 0.5691, "step": 1879 }, { "epoch": 0.47582890407491774, "grad_norm": 0.14993123710155487, "learning_rate": 1.9106465107031922e-05, "loss": 0.5713, "step": 1880 }, { "epoch": 0.4760820045558087, "grad_norm": 0.14913460612297058, "learning_rate": 1.9105475544057038e-05, "loss": 0.5671, "step": 1881 }, { "epoch": 0.47633510503669957, "grad_norm": 0.16538570821285248, "learning_rate": 1.9104485459084555e-05, "loss": 0.5331, "step": 1882 }, { "epoch": 0.47658820551759046, "grad_norm": 0.15010684728622437, "learning_rate": 1.9103494852171237e-05, "loss": 0.5802, "step": 1883 }, { "epoch": 0.4768413059984814, "grad_norm": 0.15425536036491394, "learning_rate": 1.910250372337387e-05, "loss": 0.5355, "step": 1884 }, { "epoch": 0.4770944064793723, "grad_norm": 0.1477648764848709, "learning_rate": 1.9101512072749267e-05, "loss": 0.513, "step": 1885 }, { "epoch": 0.47734750696026323, "grad_norm": 0.1467532515525818, "learning_rate": 1.9100519900354288e-05, "loss": 0.549, "step": 1886 }, { "epoch": 0.4776006074411541, "grad_norm": 0.15148386359214783, "learning_rate": 1.9099527206245807e-05, "loss": 0.5582, "step": 1887 }, { "epoch": 0.47785370792204507, "grad_norm": 0.14593085646629333, "learning_rate": 1.9098533990480735e-05, "loss": 0.5277, "step": 1888 }, { "epoch": 0.47810680840293596, "grad_norm": 0.14520245790481567, "learning_rate": 1.9097540253116007e-05, "loss": 0.5274, "step": 1889 }, { "epoch": 0.4783599088838269, "grad_norm": 0.14359933137893677, "learning_rate": 1.9096545994208594e-05, "loss": 0.5226, "step": 1890 }, { "epoch": 0.4786130093647178, "grad_norm": 0.1448543220758438, "learning_rate": 1.9095551213815497e-05, "loss": 0.5339, "step": 1891 }, { "epoch": 0.47886610984560873, "grad_norm": 0.14873534440994263, "learning_rate": 1.9094555911993742e-05, "loss": 0.547, "step": 1892 }, { "epoch": 0.4791192103264996, "grad_norm": 0.14431843161582947, "learning_rate": 1.909356008880039e-05, "loss": 0.5374, "step": 1893 }, { "epoch": 0.4793723108073905, "grad_norm": 0.15551522374153137, "learning_rate": 1.9092563744292526e-05, "loss": 0.5421, "step": 1894 }, { "epoch": 0.47962541128828146, "grad_norm": 0.14870351552963257, "learning_rate": 1.909156687852727e-05, "loss": 0.5532, "step": 1895 }, { "epoch": 0.47987851176917234, "grad_norm": 0.14925895631313324, "learning_rate": 1.909056949156177e-05, "loss": 0.5419, "step": 1896 }, { "epoch": 0.4801316122500633, "grad_norm": 0.1646534502506256, "learning_rate": 1.9089571583453206e-05, "loss": 0.5399, "step": 1897 }, { "epoch": 0.4803847127309542, "grad_norm": 0.14854730665683746, "learning_rate": 1.9088573154258782e-05, "loss": 0.5431, "step": 1898 }, { "epoch": 0.4806378132118451, "grad_norm": 0.14499633014202118, "learning_rate": 1.9087574204035738e-05, "loss": 0.5554, "step": 1899 }, { "epoch": 0.480890913692736, "grad_norm": 0.14545610547065735, "learning_rate": 1.9086574732841346e-05, "loss": 0.5275, "step": 1900 }, { "epoch": 0.48114401417362695, "grad_norm": 0.14720550179481506, "learning_rate": 1.90855747407329e-05, "loss": 0.5711, "step": 1901 }, { "epoch": 0.48139711465451784, "grad_norm": 0.15526871383190155, "learning_rate": 1.908457422776772e-05, "loss": 0.5352, "step": 1902 }, { "epoch": 0.48165021513540873, "grad_norm": 0.1483507752418518, "learning_rate": 1.9083573194003173e-05, "loss": 0.5635, "step": 1903 }, { "epoch": 0.4819033156162997, "grad_norm": 0.14749382436275482, "learning_rate": 1.9082571639496644e-05, "loss": 0.5267, "step": 1904 }, { "epoch": 0.48215641609719057, "grad_norm": 0.1496928334236145, "learning_rate": 1.908156956430555e-05, "loss": 0.5527, "step": 1905 }, { "epoch": 0.4824095165780815, "grad_norm": 0.15124240517616272, "learning_rate": 1.9080566968487337e-05, "loss": 0.553, "step": 1906 }, { "epoch": 0.4826626170589724, "grad_norm": 0.14676426351070404, "learning_rate": 1.907956385209948e-05, "loss": 0.5288, "step": 1907 }, { "epoch": 0.48291571753986334, "grad_norm": 0.1487855762243271, "learning_rate": 1.907856021519949e-05, "loss": 0.5259, "step": 1908 }, { "epoch": 0.48316881802075423, "grad_norm": 0.14760038256645203, "learning_rate": 1.9077556057844898e-05, "loss": 0.5174, "step": 1909 }, { "epoch": 0.4834219185016452, "grad_norm": 0.1448255032300949, "learning_rate": 1.9076551380093274e-05, "loss": 0.5207, "step": 1910 }, { "epoch": 0.48367501898253606, "grad_norm": 0.15068931877613068, "learning_rate": 1.9075546182002213e-05, "loss": 0.5478, "step": 1911 }, { "epoch": 0.48392811946342695, "grad_norm": 0.14817774295806885, "learning_rate": 1.907454046362934e-05, "loss": 0.5553, "step": 1912 }, { "epoch": 0.4841812199443179, "grad_norm": 0.14044204354286194, "learning_rate": 1.9073534225032313e-05, "loss": 0.5021, "step": 1913 }, { "epoch": 0.4844343204252088, "grad_norm": 0.14910194277763367, "learning_rate": 1.9072527466268816e-05, "loss": 0.5579, "step": 1914 }, { "epoch": 0.48468742090609973, "grad_norm": 0.14622847735881805, "learning_rate": 1.9071520187396563e-05, "loss": 0.5489, "step": 1915 }, { "epoch": 0.4849405213869906, "grad_norm": 0.14573965966701508, "learning_rate": 1.90705123884733e-05, "loss": 0.5139, "step": 1916 }, { "epoch": 0.48519362186788156, "grad_norm": 0.1451754868030548, "learning_rate": 1.9069504069556806e-05, "loss": 0.5397, "step": 1917 }, { "epoch": 0.48544672234877245, "grad_norm": 0.14228999614715576, "learning_rate": 1.9068495230704878e-05, "loss": 0.5257, "step": 1918 }, { "epoch": 0.4856998228296634, "grad_norm": 0.14980851113796234, "learning_rate": 1.9067485871975356e-05, "loss": 0.565, "step": 1919 }, { "epoch": 0.4859529233105543, "grad_norm": 0.14844326674938202, "learning_rate": 1.9066475993426105e-05, "loss": 0.554, "step": 1920 }, { "epoch": 0.48620602379144523, "grad_norm": 0.14422109723091125, "learning_rate": 1.9065465595115018e-05, "loss": 0.5296, "step": 1921 }, { "epoch": 0.4864591242723361, "grad_norm": 0.14998356997966766, "learning_rate": 1.9064454677100016e-05, "loss": 0.5451, "step": 1922 }, { "epoch": 0.486712224753227, "grad_norm": 0.14755471050739288, "learning_rate": 1.906344323943906e-05, "loss": 0.5637, "step": 1923 }, { "epoch": 0.48696532523411795, "grad_norm": 0.1462131142616272, "learning_rate": 1.9062431282190125e-05, "loss": 0.5349, "step": 1924 }, { "epoch": 0.48721842571500884, "grad_norm": 0.14705601334571838, "learning_rate": 1.9061418805411228e-05, "loss": 0.5413, "step": 1925 }, { "epoch": 0.4874715261958998, "grad_norm": 0.1487126648426056, "learning_rate": 1.9060405809160412e-05, "loss": 0.5382, "step": 1926 }, { "epoch": 0.4877246266767907, "grad_norm": 0.14743807911872864, "learning_rate": 1.905939229349575e-05, "loss": 0.5381, "step": 1927 }, { "epoch": 0.4879777271576816, "grad_norm": 0.1568838655948639, "learning_rate": 1.9058378258475346e-05, "loss": 0.5607, "step": 1928 }, { "epoch": 0.4882308276385725, "grad_norm": 0.1473371833562851, "learning_rate": 1.9057363704157334e-05, "loss": 0.5569, "step": 1929 }, { "epoch": 0.48848392811946345, "grad_norm": 0.14230087399482727, "learning_rate": 1.905634863059987e-05, "loss": 0.5394, "step": 1930 }, { "epoch": 0.48873702860035434, "grad_norm": 0.21522068977355957, "learning_rate": 1.9055333037861154e-05, "loss": 0.5066, "step": 1931 }, { "epoch": 0.48899012908124523, "grad_norm": 0.15192356705665588, "learning_rate": 1.9054316925999398e-05, "loss": 0.5596, "step": 1932 }, { "epoch": 0.4892432295621362, "grad_norm": 0.1506957709789276, "learning_rate": 1.9053300295072864e-05, "loss": 0.5596, "step": 1933 }, { "epoch": 0.48949633004302706, "grad_norm": 0.14356692135334015, "learning_rate": 1.9052283145139825e-05, "loss": 0.5294, "step": 1934 }, { "epoch": 0.489749430523918, "grad_norm": 0.14666032791137695, "learning_rate": 1.9051265476258596e-05, "loss": 0.5412, "step": 1935 }, { "epoch": 0.4900025310048089, "grad_norm": 0.14896340668201447, "learning_rate": 1.9050247288487517e-05, "loss": 0.5583, "step": 1936 }, { "epoch": 0.49025563148569984, "grad_norm": 0.14283309876918793, "learning_rate": 1.904922858188496e-05, "loss": 0.5163, "step": 1937 }, { "epoch": 0.4905087319665907, "grad_norm": 0.15178611874580383, "learning_rate": 1.9048209356509323e-05, "loss": 0.5424, "step": 1938 }, { "epoch": 0.49076183244748167, "grad_norm": 0.1689726561307907, "learning_rate": 1.9047189612419037e-05, "loss": 0.5351, "step": 1939 }, { "epoch": 0.49101493292837256, "grad_norm": 0.1439141482114792, "learning_rate": 1.9046169349672564e-05, "loss": 0.5146, "step": 1940 }, { "epoch": 0.4912680334092635, "grad_norm": 0.1454126238822937, "learning_rate": 1.904514856832839e-05, "loss": 0.5702, "step": 1941 }, { "epoch": 0.4915211338901544, "grad_norm": 0.14274144172668457, "learning_rate": 1.9044127268445033e-05, "loss": 0.52, "step": 1942 }, { "epoch": 0.4917742343710453, "grad_norm": 0.14609690010547638, "learning_rate": 1.9043105450081047e-05, "loss": 0.5065, "step": 1943 }, { "epoch": 0.4920273348519362, "grad_norm": 0.15331365168094635, "learning_rate": 1.9042083113295006e-05, "loss": 0.55, "step": 1944 }, { "epoch": 0.4922804353328271, "grad_norm": 0.15041755139827728, "learning_rate": 1.9041060258145525e-05, "loss": 0.5474, "step": 1945 }, { "epoch": 0.49253353581371806, "grad_norm": 0.14926937222480774, "learning_rate": 1.9040036884691235e-05, "loss": 0.56, "step": 1946 }, { "epoch": 0.49278663629460895, "grad_norm": 0.15952521562576294, "learning_rate": 1.9039012992990806e-05, "loss": 0.5525, "step": 1947 }, { "epoch": 0.4930397367754999, "grad_norm": 0.14475597441196442, "learning_rate": 1.9037988583102936e-05, "loss": 0.5547, "step": 1948 }, { "epoch": 0.4932928372563908, "grad_norm": 0.1460406631231308, "learning_rate": 1.9036963655086353e-05, "loss": 0.5205, "step": 1949 }, { "epoch": 0.4935459377372817, "grad_norm": 0.14597095549106598, "learning_rate": 1.9035938208999812e-05, "loss": 0.5318, "step": 1950 }, { "epoch": 0.4937990382181726, "grad_norm": 0.1466267853975296, "learning_rate": 1.9034912244902103e-05, "loss": 0.5391, "step": 1951 }, { "epoch": 0.4940521386990635, "grad_norm": 0.1474587321281433, "learning_rate": 1.903388576285204e-05, "loss": 0.5297, "step": 1952 }, { "epoch": 0.49430523917995445, "grad_norm": 0.1442517638206482, "learning_rate": 1.9032858762908468e-05, "loss": 0.5211, "step": 1953 }, { "epoch": 0.49455833966084534, "grad_norm": 0.14450058341026306, "learning_rate": 1.9031831245130265e-05, "loss": 0.5576, "step": 1954 }, { "epoch": 0.4948114401417363, "grad_norm": 0.15431705117225647, "learning_rate": 1.9030803209576337e-05, "loss": 0.5432, "step": 1955 }, { "epoch": 0.49506454062262717, "grad_norm": 0.1450243592262268, "learning_rate": 1.9029774656305614e-05, "loss": 0.5456, "step": 1956 }, { "epoch": 0.4953176411035181, "grad_norm": 0.1464066505432129, "learning_rate": 1.9028745585377066e-05, "loss": 0.5454, "step": 1957 }, { "epoch": 0.495570741584409, "grad_norm": 0.14703373610973358, "learning_rate": 1.9027715996849686e-05, "loss": 0.5622, "step": 1958 }, { "epoch": 0.49582384206529995, "grad_norm": 0.14474087953567505, "learning_rate": 1.9026685890782498e-05, "loss": 0.5169, "step": 1959 }, { "epoch": 0.49607694254619084, "grad_norm": 0.14639674127101898, "learning_rate": 1.902565526723455e-05, "loss": 0.5581, "step": 1960 }, { "epoch": 0.4963300430270817, "grad_norm": 0.15299645066261292, "learning_rate": 1.9024624126264938e-05, "loss": 0.5219, "step": 1961 }, { "epoch": 0.49658314350797267, "grad_norm": 0.14949935674667358, "learning_rate": 1.9023592467932767e-05, "loss": 0.5601, "step": 1962 }, { "epoch": 0.49683624398886356, "grad_norm": 0.14503586292266846, "learning_rate": 1.9022560292297183e-05, "loss": 0.544, "step": 1963 }, { "epoch": 0.4970893444697545, "grad_norm": 0.17673051357269287, "learning_rate": 1.9021527599417354e-05, "loss": 0.5612, "step": 1964 }, { "epoch": 0.4973424449506454, "grad_norm": 0.14651919901371002, "learning_rate": 1.9020494389352483e-05, "loss": 0.5746, "step": 1965 }, { "epoch": 0.49759554543153633, "grad_norm": 0.15588942170143127, "learning_rate": 1.9019460662161807e-05, "loss": 0.5579, "step": 1966 }, { "epoch": 0.4978486459124272, "grad_norm": 0.1492077112197876, "learning_rate": 1.901842641790458e-05, "loss": 0.5499, "step": 1967 }, { "epoch": 0.49810174639331817, "grad_norm": 0.18058110773563385, "learning_rate": 1.9017391656640097e-05, "loss": 0.5418, "step": 1968 }, { "epoch": 0.49835484687420906, "grad_norm": 0.16760560870170593, "learning_rate": 1.9016356378427683e-05, "loss": 0.5246, "step": 1969 }, { "epoch": 0.4986079473551, "grad_norm": 0.1536208987236023, "learning_rate": 1.901532058332668e-05, "loss": 0.581, "step": 1970 }, { "epoch": 0.4988610478359909, "grad_norm": 0.16915126144886017, "learning_rate": 1.901428427139647e-05, "loss": 0.5336, "step": 1971 }, { "epoch": 0.4991141483168818, "grad_norm": 0.14436998963356018, "learning_rate": 1.901324744269647e-05, "loss": 0.5401, "step": 1972 }, { "epoch": 0.4993672487977727, "grad_norm": 0.14993751049041748, "learning_rate": 1.901221009728611e-05, "loss": 0.5555, "step": 1973 }, { "epoch": 0.4996203492786636, "grad_norm": 0.14491701126098633, "learning_rate": 1.901117223522486e-05, "loss": 0.5338, "step": 1974 }, { "epoch": 0.49987344975955456, "grad_norm": 0.14353638887405396, "learning_rate": 1.9010133856572222e-05, "loss": 0.5344, "step": 1975 }, { "epoch": 0.5001265502404455, "grad_norm": 0.14797140657901764, "learning_rate": 1.9009094961387726e-05, "loss": 0.525, "step": 1976 }, { "epoch": 0.5003796507213364, "grad_norm": 0.14880260825157166, "learning_rate": 1.900805554973092e-05, "loss": 0.5453, "step": 1977 }, { "epoch": 0.5006327512022273, "grad_norm": 0.14741623401641846, "learning_rate": 1.90070156216614e-05, "loss": 0.5341, "step": 1978 }, { "epoch": 0.5008858516831182, "grad_norm": 0.14488853514194489, "learning_rate": 1.9005975177238784e-05, "loss": 0.5299, "step": 1979 }, { "epoch": 0.5011389521640092, "grad_norm": 0.14642424881458282, "learning_rate": 1.9004934216522714e-05, "loss": 0.5569, "step": 1980 }, { "epoch": 0.5013920526449, "grad_norm": 0.14464901387691498, "learning_rate": 1.9003892739572863e-05, "loss": 0.5588, "step": 1981 }, { "epoch": 0.5016451531257909, "grad_norm": 0.147584930062294, "learning_rate": 1.9002850746448947e-05, "loss": 0.5346, "step": 1982 }, { "epoch": 0.5018982536066818, "grad_norm": 0.1435047686100006, "learning_rate": 1.900180823721069e-05, "loss": 0.5466, "step": 1983 }, { "epoch": 0.5021513540875727, "grad_norm": 0.169583261013031, "learning_rate": 1.9000765211917862e-05, "loss": 0.5527, "step": 1984 }, { "epoch": 0.5024044545684637, "grad_norm": 0.15055251121520996, "learning_rate": 1.8999721670630256e-05, "loss": 0.5795, "step": 1985 }, { "epoch": 0.5026575550493546, "grad_norm": 0.15124401450157166, "learning_rate": 1.8998677613407703e-05, "loss": 0.5141, "step": 1986 }, { "epoch": 0.5029106555302455, "grad_norm": 0.14702655375003815, "learning_rate": 1.8997633040310047e-05, "loss": 0.5314, "step": 1987 }, { "epoch": 0.5031637560111364, "grad_norm": 0.1500476449728012, "learning_rate": 1.8996587951397176e-05, "loss": 0.5811, "step": 1988 }, { "epoch": 0.5034168564920274, "grad_norm": 0.14876651763916016, "learning_rate": 1.8995542346729003e-05, "loss": 0.5242, "step": 1989 }, { "epoch": 0.5036699569729183, "grad_norm": 0.14694412052631378, "learning_rate": 1.8994496226365467e-05, "loss": 0.517, "step": 1990 }, { "epoch": 0.5039230574538092, "grad_norm": 0.1451118141412735, "learning_rate": 1.899344959036654e-05, "loss": 0.533, "step": 1991 }, { "epoch": 0.5041761579347, "grad_norm": 0.14241361618041992, "learning_rate": 1.8992402438792225e-05, "loss": 0.5199, "step": 1992 }, { "epoch": 0.5044292584155909, "grad_norm": 0.1467846781015396, "learning_rate": 1.8991354771702557e-05, "loss": 0.5298, "step": 1993 }, { "epoch": 0.5046823588964819, "grad_norm": 0.14562153816223145, "learning_rate": 1.899030658915759e-05, "loss": 0.5373, "step": 1994 }, { "epoch": 0.5049354593773728, "grad_norm": 0.1485190987586975, "learning_rate": 1.8989257891217422e-05, "loss": 0.566, "step": 1995 }, { "epoch": 0.5051885598582637, "grad_norm": 0.14917857944965363, "learning_rate": 1.8988208677942163e-05, "loss": 0.5349, "step": 1996 }, { "epoch": 0.5054416603391546, "grad_norm": 0.1508703976869583, "learning_rate": 1.8987158949391965e-05, "loss": 0.5426, "step": 1997 }, { "epoch": 0.5056947608200456, "grad_norm": 0.14861728250980377, "learning_rate": 1.898610870562701e-05, "loss": 0.5555, "step": 1998 }, { "epoch": 0.5059478613009365, "grad_norm": 0.14600548148155212, "learning_rate": 1.8985057946707507e-05, "loss": 0.5273, "step": 1999 }, { "epoch": 0.5062009617818274, "grad_norm": 0.14683488011360168, "learning_rate": 1.8984006672693688e-05, "loss": 0.5254, "step": 2000 }, { "epoch": 0.5064540622627183, "grad_norm": 0.14343638718128204, "learning_rate": 1.8982954883645826e-05, "loss": 0.5368, "step": 2001 }, { "epoch": 0.5067071627436092, "grad_norm": 0.1483989655971527, "learning_rate": 1.8981902579624216e-05, "loss": 0.5546, "step": 2002 }, { "epoch": 0.5069602632245002, "grad_norm": 0.14976057410240173, "learning_rate": 1.898084976068918e-05, "loss": 0.5373, "step": 2003 }, { "epoch": 0.507213363705391, "grad_norm": 0.14165432751178741, "learning_rate": 1.897979642690108e-05, "loss": 0.5337, "step": 2004 }, { "epoch": 0.5074664641862819, "grad_norm": 0.14637352526187897, "learning_rate": 1.8978742578320303e-05, "loss": 0.5268, "step": 2005 }, { "epoch": 0.5077195646671728, "grad_norm": 0.14603778719902039, "learning_rate": 1.897768821500726e-05, "loss": 0.5407, "step": 2006 }, { "epoch": 0.5079726651480638, "grad_norm": 0.14633269608020782, "learning_rate": 1.897663333702239e-05, "loss": 0.558, "step": 2007 }, { "epoch": 0.5082257656289547, "grad_norm": 0.1476481556892395, "learning_rate": 1.8975577944426177e-05, "loss": 0.5352, "step": 2008 }, { "epoch": 0.5084788661098456, "grad_norm": 0.14400698244571686, "learning_rate": 1.8974522037279116e-05, "loss": 0.5296, "step": 2009 }, { "epoch": 0.5087319665907365, "grad_norm": 0.14734113216400146, "learning_rate": 1.8973465615641745e-05, "loss": 0.5487, "step": 2010 }, { "epoch": 0.5089850670716274, "grad_norm": 0.14925172924995422, "learning_rate": 1.8972408679574625e-05, "loss": 0.5543, "step": 2011 }, { "epoch": 0.5092381675525184, "grad_norm": 0.14813505113124847, "learning_rate": 1.897135122913835e-05, "loss": 0.52, "step": 2012 }, { "epoch": 0.5094912680334093, "grad_norm": 0.14566028118133545, "learning_rate": 1.8970293264393536e-05, "loss": 0.5769, "step": 2013 }, { "epoch": 0.5097443685143002, "grad_norm": 0.14642831683158875, "learning_rate": 1.896923478540084e-05, "loss": 0.5491, "step": 2014 }, { "epoch": 0.509997468995191, "grad_norm": 0.1512906402349472, "learning_rate": 1.896817579222094e-05, "loss": 0.5539, "step": 2015 }, { "epoch": 0.510250569476082, "grad_norm": 0.15196779370307922, "learning_rate": 1.8967116284914545e-05, "loss": 0.5313, "step": 2016 }, { "epoch": 0.5105036699569729, "grad_norm": 0.1474650502204895, "learning_rate": 1.8966056263542393e-05, "loss": 0.5331, "step": 2017 }, { "epoch": 0.5107567704378638, "grad_norm": 0.15592193603515625, "learning_rate": 1.8964995728165255e-05, "loss": 0.5485, "step": 2018 }, { "epoch": 0.5110098709187547, "grad_norm": 0.1451408714056015, "learning_rate": 1.8963934678843933e-05, "loss": 0.543, "step": 2019 }, { "epoch": 0.5112629713996456, "grad_norm": 0.14020602405071259, "learning_rate": 1.8962873115639246e-05, "loss": 0.5583, "step": 2020 }, { "epoch": 0.5115160718805366, "grad_norm": 0.14597153663635254, "learning_rate": 1.8961811038612057e-05, "loss": 0.5399, "step": 2021 }, { "epoch": 0.5117691723614275, "grad_norm": 0.17113880813121796, "learning_rate": 1.896074844782325e-05, "loss": 0.5566, "step": 2022 }, { "epoch": 0.5120222728423184, "grad_norm": 0.15941676497459412, "learning_rate": 1.8959685343333746e-05, "loss": 0.5279, "step": 2023 }, { "epoch": 0.5122753733232093, "grad_norm": 0.13685846328735352, "learning_rate": 1.8958621725204486e-05, "loss": 0.5301, "step": 2024 }, { "epoch": 0.5125284738041003, "grad_norm": 0.14705871045589447, "learning_rate": 1.8957557593496442e-05, "loss": 0.5471, "step": 2025 }, { "epoch": 0.5127815742849912, "grad_norm": 0.1425859034061432, "learning_rate": 1.8956492948270625e-05, "loss": 0.525, "step": 2026 }, { "epoch": 0.513034674765882, "grad_norm": 0.16177019476890564, "learning_rate": 1.895542778958807e-05, "loss": 0.5966, "step": 2027 }, { "epoch": 0.5132877752467729, "grad_norm": 0.15402144193649292, "learning_rate": 1.895436211750983e-05, "loss": 0.5515, "step": 2028 }, { "epoch": 0.5135408757276639, "grad_norm": 0.14698517322540283, "learning_rate": 1.8953295932097007e-05, "loss": 0.5314, "step": 2029 }, { "epoch": 0.5137939762085548, "grad_norm": 0.15535616874694824, "learning_rate": 1.895222923341072e-05, "loss": 0.5531, "step": 2030 }, { "epoch": 0.5140470766894457, "grad_norm": 0.15035369992256165, "learning_rate": 1.895116202151212e-05, "loss": 0.5351, "step": 2031 }, { "epoch": 0.5143001771703366, "grad_norm": 0.14429210126399994, "learning_rate": 1.8950094296462392e-05, "loss": 0.5495, "step": 2032 }, { "epoch": 0.5145532776512275, "grad_norm": 0.14750072360038757, "learning_rate": 1.894902605832274e-05, "loss": 0.55, "step": 2033 }, { "epoch": 0.5148063781321185, "grad_norm": 0.15020400285720825, "learning_rate": 1.894795730715441e-05, "loss": 0.5516, "step": 2034 }, { "epoch": 0.5150594786130094, "grad_norm": 0.14803938567638397, "learning_rate": 1.8946888043018666e-05, "loss": 0.5431, "step": 2035 }, { "epoch": 0.5153125790939003, "grad_norm": 0.1507357358932495, "learning_rate": 1.894581826597681e-05, "loss": 0.5701, "step": 2036 }, { "epoch": 0.5155656795747912, "grad_norm": 0.1515742987394333, "learning_rate": 1.894474797609017e-05, "loss": 0.5624, "step": 2037 }, { "epoch": 0.5158187800556822, "grad_norm": 0.1476578265428543, "learning_rate": 1.89436771734201e-05, "loss": 0.5491, "step": 2038 }, { "epoch": 0.516071880536573, "grad_norm": 0.14802879095077515, "learning_rate": 1.8942605858027988e-05, "loss": 0.5459, "step": 2039 }, { "epoch": 0.5163249810174639, "grad_norm": 0.1512601375579834, "learning_rate": 1.8941534029975257e-05, "loss": 0.5526, "step": 2040 }, { "epoch": 0.5165780814983548, "grad_norm": 0.1454947143793106, "learning_rate": 1.8940461689323343e-05, "loss": 0.5341, "step": 2041 }, { "epoch": 0.5168311819792457, "grad_norm": 0.1484716832637787, "learning_rate": 1.8939388836133728e-05, "loss": 0.5406, "step": 2042 }, { "epoch": 0.5170842824601367, "grad_norm": 0.14195962250232697, "learning_rate": 1.8938315470467912e-05, "loss": 0.5365, "step": 2043 }, { "epoch": 0.5173373829410276, "grad_norm": 0.14998741447925568, "learning_rate": 1.893724159238743e-05, "loss": 0.5313, "step": 2044 }, { "epoch": 0.5175904834219185, "grad_norm": 0.14848732948303223, "learning_rate": 1.893616720195385e-05, "loss": 0.5479, "step": 2045 }, { "epoch": 0.5178435839028094, "grad_norm": 0.15063674747943878, "learning_rate": 1.8935092299228753e-05, "loss": 0.58, "step": 2046 }, { "epoch": 0.5180966843837004, "grad_norm": 0.14607171714305878, "learning_rate": 1.8934016884273773e-05, "loss": 0.5336, "step": 2047 }, { "epoch": 0.5183497848645913, "grad_norm": 0.14624135196208954, "learning_rate": 1.8932940957150557e-05, "loss": 0.5441, "step": 2048 }, { "epoch": 0.5186028853454822, "grad_norm": 0.1554187834262848, "learning_rate": 1.893186451792078e-05, "loss": 0.5666, "step": 2049 }, { "epoch": 0.518855985826373, "grad_norm": 0.14865043759346008, "learning_rate": 1.893078756664616e-05, "loss": 0.5323, "step": 2050 }, { "epoch": 0.5191090863072639, "grad_norm": 0.14657746255397797, "learning_rate": 1.8929710103388435e-05, "loss": 0.5371, "step": 2051 }, { "epoch": 0.5193621867881549, "grad_norm": 0.15419794619083405, "learning_rate": 1.8928632128209368e-05, "loss": 0.5338, "step": 2052 }, { "epoch": 0.5196152872690458, "grad_norm": 0.14877189695835114, "learning_rate": 1.8927553641170764e-05, "loss": 0.5763, "step": 2053 }, { "epoch": 0.5198683877499367, "grad_norm": 0.15488377213478088, "learning_rate": 1.8926474642334443e-05, "loss": 0.5632, "step": 2054 }, { "epoch": 0.5201214882308276, "grad_norm": 0.14712969958782196, "learning_rate": 1.892539513176227e-05, "loss": 0.5599, "step": 2055 }, { "epoch": 0.5203745887117186, "grad_norm": 0.14861759543418884, "learning_rate": 1.8924315109516124e-05, "loss": 0.525, "step": 2056 }, { "epoch": 0.5206276891926095, "grad_norm": 0.1469864696264267, "learning_rate": 1.8923234575657928e-05, "loss": 0.56, "step": 2057 }, { "epoch": 0.5208807896735004, "grad_norm": 0.14894641935825348, "learning_rate": 1.892215353024962e-05, "loss": 0.5274, "step": 2058 }, { "epoch": 0.5211338901543913, "grad_norm": 0.15288713574409485, "learning_rate": 1.8921071973353174e-05, "loss": 0.551, "step": 2059 }, { "epoch": 0.5213869906352822, "grad_norm": 0.14344966411590576, "learning_rate": 1.89199899050306e-05, "loss": 0.533, "step": 2060 }, { "epoch": 0.5216400911161732, "grad_norm": 0.1435471773147583, "learning_rate": 1.891890732534392e-05, "loss": 0.5379, "step": 2061 }, { "epoch": 0.521893191597064, "grad_norm": 0.14533542096614838, "learning_rate": 1.8917824234355205e-05, "loss": 0.5225, "step": 2062 }, { "epoch": 0.5221462920779549, "grad_norm": 0.14329683780670166, "learning_rate": 1.8916740632126544e-05, "loss": 0.5398, "step": 2063 }, { "epoch": 0.5223993925588458, "grad_norm": 0.14607039093971252, "learning_rate": 1.891565651872006e-05, "loss": 0.5059, "step": 2064 }, { "epoch": 0.5226524930397368, "grad_norm": 0.14713475108146667, "learning_rate": 1.8914571894197894e-05, "loss": 0.5527, "step": 2065 }, { "epoch": 0.5229055935206277, "grad_norm": 0.1503877490758896, "learning_rate": 1.8913486758622236e-05, "loss": 0.5449, "step": 2066 }, { "epoch": 0.5231586940015186, "grad_norm": 0.1482800990343094, "learning_rate": 1.8912401112055288e-05, "loss": 0.5663, "step": 2067 }, { "epoch": 0.5234117944824095, "grad_norm": 0.14930813014507294, "learning_rate": 1.891131495455929e-05, "loss": 0.5481, "step": 2068 }, { "epoch": 0.5236648949633004, "grad_norm": 0.14039750397205353, "learning_rate": 1.891022828619651e-05, "loss": 0.517, "step": 2069 }, { "epoch": 0.5239179954441914, "grad_norm": 0.1495741307735443, "learning_rate": 1.8909141107029237e-05, "loss": 0.5328, "step": 2070 }, { "epoch": 0.5241710959250823, "grad_norm": 0.14653369784355164, "learning_rate": 1.890805341711981e-05, "loss": 0.5524, "step": 2071 }, { "epoch": 0.5244241964059732, "grad_norm": 0.15061494708061218, "learning_rate": 1.8906965216530574e-05, "loss": 0.564, "step": 2072 }, { "epoch": 0.524677296886864, "grad_norm": 0.1590503454208374, "learning_rate": 1.8905876505323918e-05, "loss": 0.5553, "step": 2073 }, { "epoch": 0.524930397367755, "grad_norm": 0.19967687129974365, "learning_rate": 1.890478728356225e-05, "loss": 0.557, "step": 2074 }, { "epoch": 0.5251834978486459, "grad_norm": 0.14689359068870544, "learning_rate": 1.8903697551308018e-05, "loss": 0.5203, "step": 2075 }, { "epoch": 0.5254365983295368, "grad_norm": 0.14794990420341492, "learning_rate": 1.890260730862369e-05, "loss": 0.5395, "step": 2076 }, { "epoch": 0.5256896988104277, "grad_norm": 0.14844904839992523, "learning_rate": 1.8901516555571772e-05, "loss": 0.5522, "step": 2077 }, { "epoch": 0.5259427992913187, "grad_norm": 0.14163947105407715, "learning_rate": 1.8900425292214794e-05, "loss": 0.5318, "step": 2078 }, { "epoch": 0.5261958997722096, "grad_norm": 0.15207208693027496, "learning_rate": 1.889933351861531e-05, "loss": 0.5234, "step": 2079 }, { "epoch": 0.5264490002531005, "grad_norm": 0.14353814721107483, "learning_rate": 1.8898241234835914e-05, "loss": 0.5376, "step": 2080 }, { "epoch": 0.5267021007339914, "grad_norm": 0.14712877571582794, "learning_rate": 1.8897148440939227e-05, "loss": 0.5412, "step": 2081 }, { "epoch": 0.5269552012148823, "grad_norm": 0.1465034931898117, "learning_rate": 1.889605513698789e-05, "loss": 0.5296, "step": 2082 }, { "epoch": 0.5272083016957733, "grad_norm": 0.14930671453475952, "learning_rate": 1.8894961323044584e-05, "loss": 0.5479, "step": 2083 }, { "epoch": 0.5274614021766642, "grad_norm": 0.14586983621120453, "learning_rate": 1.889386699917201e-05, "loss": 0.5548, "step": 2084 }, { "epoch": 0.527714502657555, "grad_norm": 0.14690659940242767, "learning_rate": 1.889277216543291e-05, "loss": 0.5351, "step": 2085 }, { "epoch": 0.5279676031384459, "grad_norm": 0.1415756791830063, "learning_rate": 1.8891676821890047e-05, "loss": 0.5311, "step": 2086 }, { "epoch": 0.5282207036193369, "grad_norm": 0.14690400660037994, "learning_rate": 1.8890580968606208e-05, "loss": 0.5541, "step": 2087 }, { "epoch": 0.5284738041002278, "grad_norm": 0.14690932631492615, "learning_rate": 1.8889484605644226e-05, "loss": 0.545, "step": 2088 }, { "epoch": 0.5287269045811187, "grad_norm": 0.13867908716201782, "learning_rate": 1.8888387733066946e-05, "loss": 0.5583, "step": 2089 }, { "epoch": 0.5289800050620096, "grad_norm": 0.14592419564723969, "learning_rate": 1.8887290350937253e-05, "loss": 0.5415, "step": 2090 }, { "epoch": 0.5292331055429005, "grad_norm": 0.14960640668869019, "learning_rate": 1.8886192459318056e-05, "loss": 0.554, "step": 2091 }, { "epoch": 0.5294862060237915, "grad_norm": 0.148869588971138, "learning_rate": 1.8885094058272296e-05, "loss": 0.5576, "step": 2092 }, { "epoch": 0.5297393065046824, "grad_norm": 0.14470264315605164, "learning_rate": 1.888399514786294e-05, "loss": 0.5394, "step": 2093 }, { "epoch": 0.5299924069855733, "grad_norm": 0.1456449329853058, "learning_rate": 1.8882895728152986e-05, "loss": 0.5323, "step": 2094 }, { "epoch": 0.5302455074664641, "grad_norm": 0.14392712712287903, "learning_rate": 1.8881795799205465e-05, "loss": 0.5256, "step": 2095 }, { "epoch": 0.5304986079473551, "grad_norm": 0.14633582532405853, "learning_rate": 1.888069536108343e-05, "loss": 0.536, "step": 2096 }, { "epoch": 0.530751708428246, "grad_norm": 0.1577252596616745, "learning_rate": 1.8879594413849966e-05, "loss": 0.5289, "step": 2097 }, { "epoch": 0.5310048089091369, "grad_norm": 0.14655692875385284, "learning_rate": 1.8878492957568193e-05, "loss": 0.5507, "step": 2098 }, { "epoch": 0.5312579093900278, "grad_norm": 0.14495889842510223, "learning_rate": 1.8877390992301252e-05, "loss": 0.5758, "step": 2099 }, { "epoch": 0.5315110098709187, "grad_norm": 0.16211102902889252, "learning_rate": 1.8876288518112315e-05, "loss": 0.5455, "step": 2100 }, { "epoch": 0.5317641103518097, "grad_norm": 0.1435222029685974, "learning_rate": 1.8875185535064588e-05, "loss": 0.5419, "step": 2101 }, { "epoch": 0.5320172108327006, "grad_norm": 0.14342010021209717, "learning_rate": 1.88740820432213e-05, "loss": 0.5603, "step": 2102 }, { "epoch": 0.5322703113135915, "grad_norm": 0.14504124224185944, "learning_rate": 1.8872978042645714e-05, "loss": 0.5188, "step": 2103 }, { "epoch": 0.5325234117944824, "grad_norm": 0.1456257551908493, "learning_rate": 1.8871873533401113e-05, "loss": 0.5403, "step": 2104 }, { "epoch": 0.5327765122753734, "grad_norm": 0.16915258765220642, "learning_rate": 1.8870768515550826e-05, "loss": 0.5291, "step": 2105 }, { "epoch": 0.5330296127562643, "grad_norm": 0.14916908740997314, "learning_rate": 1.8869662989158197e-05, "loss": 0.5471, "step": 2106 }, { "epoch": 0.5332827132371551, "grad_norm": 0.15560995042324066, "learning_rate": 1.8868556954286604e-05, "loss": 0.5356, "step": 2107 }, { "epoch": 0.533535813718046, "grad_norm": 0.1501181274652481, "learning_rate": 1.8867450410999453e-05, "loss": 0.5277, "step": 2108 }, { "epoch": 0.5337889141989369, "grad_norm": 0.1498737931251526, "learning_rate": 1.886634335936018e-05, "loss": 0.5623, "step": 2109 }, { "epoch": 0.5340420146798279, "grad_norm": 0.142748162150383, "learning_rate": 1.8865235799432246e-05, "loss": 0.5373, "step": 2110 }, { "epoch": 0.5342951151607188, "grad_norm": 0.1436242014169693, "learning_rate": 1.886412773127915e-05, "loss": 0.5397, "step": 2111 }, { "epoch": 0.5345482156416097, "grad_norm": 0.1510160118341446, "learning_rate": 1.8863019154964418e-05, "loss": 0.5333, "step": 2112 }, { "epoch": 0.5348013161225006, "grad_norm": 0.14627863466739655, "learning_rate": 1.8861910070551597e-05, "loss": 0.5535, "step": 2113 }, { "epoch": 0.5350544166033916, "grad_norm": 0.14565078914165497, "learning_rate": 1.886080047810427e-05, "loss": 0.5402, "step": 2114 }, { "epoch": 0.5353075170842825, "grad_norm": 0.14789433777332306, "learning_rate": 1.8859690377686046e-05, "loss": 0.5256, "step": 2115 }, { "epoch": 0.5355606175651734, "grad_norm": 0.1485886126756668, "learning_rate": 1.885857976936057e-05, "loss": 0.5282, "step": 2116 }, { "epoch": 0.5358137180460643, "grad_norm": 0.1415485143661499, "learning_rate": 1.8857468653191503e-05, "loss": 0.5339, "step": 2117 }, { "epoch": 0.5360668185269551, "grad_norm": 0.1438799798488617, "learning_rate": 1.885635702924255e-05, "loss": 0.526, "step": 2118 }, { "epoch": 0.5363199190078461, "grad_norm": 0.14721158146858215, "learning_rate": 1.8855244897577432e-05, "loss": 0.5386, "step": 2119 }, { "epoch": 0.536573019488737, "grad_norm": 0.1481776386499405, "learning_rate": 1.885413225825991e-05, "loss": 0.5634, "step": 2120 }, { "epoch": 0.5368261199696279, "grad_norm": 0.14346905052661896, "learning_rate": 1.8853019111353766e-05, "loss": 0.5261, "step": 2121 }, { "epoch": 0.5370792204505188, "grad_norm": 0.14726269245147705, "learning_rate": 1.8851905456922818e-05, "loss": 0.525, "step": 2122 }, { "epoch": 0.5373323209314098, "grad_norm": 0.14352542161941528, "learning_rate": 1.8850791295030904e-05, "loss": 0.5329, "step": 2123 }, { "epoch": 0.5375854214123007, "grad_norm": 0.16002048552036285, "learning_rate": 1.8849676625741905e-05, "loss": 0.5259, "step": 2124 }, { "epoch": 0.5378385218931916, "grad_norm": 0.14723941683769226, "learning_rate": 1.8848561449119713e-05, "loss": 0.5293, "step": 2125 }, { "epoch": 0.5380916223740825, "grad_norm": 0.14481991529464722, "learning_rate": 1.8847445765228262e-05, "loss": 0.5285, "step": 2126 }, { "epoch": 0.5383447228549735, "grad_norm": 0.14677830040454865, "learning_rate": 1.8846329574131514e-05, "loss": 0.5222, "step": 2127 }, { "epoch": 0.5385978233358644, "grad_norm": 0.14093263447284698, "learning_rate": 1.8845212875893457e-05, "loss": 0.524, "step": 2128 }, { "epoch": 0.5388509238167553, "grad_norm": 0.1465352177619934, "learning_rate": 1.884409567057811e-05, "loss": 0.5328, "step": 2129 }, { "epoch": 0.5391040242976461, "grad_norm": 0.14699774980545044, "learning_rate": 1.884297795824952e-05, "loss": 0.5203, "step": 2130 }, { "epoch": 0.539357124778537, "grad_norm": 0.14181242883205414, "learning_rate": 1.8841859738971758e-05, "loss": 0.5508, "step": 2131 }, { "epoch": 0.539610225259428, "grad_norm": 0.14994683861732483, "learning_rate": 1.8840741012808937e-05, "loss": 0.5595, "step": 2132 }, { "epoch": 0.5398633257403189, "grad_norm": 0.14866229891777039, "learning_rate": 1.883962177982518e-05, "loss": 0.5573, "step": 2133 }, { "epoch": 0.5401164262212098, "grad_norm": 0.14127808809280396, "learning_rate": 1.8838502040084667e-05, "loss": 0.5283, "step": 2134 }, { "epoch": 0.5403695267021007, "grad_norm": 0.14179453253746033, "learning_rate": 1.8837381793651574e-05, "loss": 0.5463, "step": 2135 }, { "epoch": 0.5406226271829917, "grad_norm": 0.1535675823688507, "learning_rate": 1.8836261040590132e-05, "loss": 0.5538, "step": 2136 }, { "epoch": 0.5408757276638826, "grad_norm": 0.14412690699100494, "learning_rate": 1.8835139780964583e-05, "loss": 0.5468, "step": 2137 }, { "epoch": 0.5411288281447735, "grad_norm": 0.14546814560890198, "learning_rate": 1.8834018014839216e-05, "loss": 0.5348, "step": 2138 }, { "epoch": 0.5413819286256644, "grad_norm": 0.16463163495063782, "learning_rate": 1.883289574227834e-05, "loss": 0.55, "step": 2139 }, { "epoch": 0.5416350291065553, "grad_norm": 0.14167420566082, "learning_rate": 1.883177296334628e-05, "loss": 0.5556, "step": 2140 }, { "epoch": 0.5418881295874463, "grad_norm": 0.1812027245759964, "learning_rate": 1.8830649678107415e-05, "loss": 0.5446, "step": 2141 }, { "epoch": 0.5421412300683371, "grad_norm": 0.1430598348379135, "learning_rate": 1.8829525886626135e-05, "loss": 0.5163, "step": 2142 }, { "epoch": 0.542394330549228, "grad_norm": 0.16585777699947357, "learning_rate": 1.8828401588966864e-05, "loss": 0.5631, "step": 2143 }, { "epoch": 0.5426474310301189, "grad_norm": 0.14548391103744507, "learning_rate": 1.8827276785194056e-05, "loss": 0.5474, "step": 2144 }, { "epoch": 0.5429005315110099, "grad_norm": 0.1467544436454773, "learning_rate": 1.88261514753722e-05, "loss": 0.5493, "step": 2145 }, { "epoch": 0.5431536319919008, "grad_norm": 0.14689014852046967, "learning_rate": 1.8825025659565798e-05, "loss": 0.5416, "step": 2146 }, { "epoch": 0.5434067324727917, "grad_norm": 0.14642195403575897, "learning_rate": 1.8823899337839394e-05, "loss": 0.5426, "step": 2147 }, { "epoch": 0.5436598329536826, "grad_norm": 0.14804759621620178, "learning_rate": 1.8822772510257562e-05, "loss": 0.5637, "step": 2148 }, { "epoch": 0.5439129334345735, "grad_norm": 0.1435912698507309, "learning_rate": 1.8821645176884894e-05, "loss": 0.5184, "step": 2149 }, { "epoch": 0.5441660339154645, "grad_norm": 0.1469103991985321, "learning_rate": 1.8820517337786023e-05, "loss": 0.5412, "step": 2150 }, { "epoch": 0.5444191343963554, "grad_norm": 0.1398661583662033, "learning_rate": 1.8819388993025603e-05, "loss": 0.544, "step": 2151 }, { "epoch": 0.5446722348772463, "grad_norm": 0.15352042019367218, "learning_rate": 1.881826014266832e-05, "loss": 0.5487, "step": 2152 }, { "epoch": 0.5449253353581371, "grad_norm": 0.14718757569789886, "learning_rate": 1.8817130786778888e-05, "loss": 0.5265, "step": 2153 }, { "epoch": 0.5451784358390281, "grad_norm": 0.1449514478445053, "learning_rate": 1.8816000925422053e-05, "loss": 0.5498, "step": 2154 }, { "epoch": 0.545431536319919, "grad_norm": 0.14808648824691772, "learning_rate": 1.8814870558662582e-05, "loss": 0.5468, "step": 2155 }, { "epoch": 0.5456846368008099, "grad_norm": 0.14138783514499664, "learning_rate": 1.8813739686565285e-05, "loss": 0.5224, "step": 2156 }, { "epoch": 0.5459377372817008, "grad_norm": 0.14915712177753448, "learning_rate": 1.881260830919499e-05, "loss": 0.5228, "step": 2157 }, { "epoch": 0.5461908377625917, "grad_norm": 0.14445608854293823, "learning_rate": 1.881147642661655e-05, "loss": 0.5146, "step": 2158 }, { "epoch": 0.5464439382434827, "grad_norm": 0.14614073932170868, "learning_rate": 1.8810344038894857e-05, "loss": 0.5391, "step": 2159 }, { "epoch": 0.5466970387243736, "grad_norm": 0.14145520329475403, "learning_rate": 1.880921114609483e-05, "loss": 0.5195, "step": 2160 }, { "epoch": 0.5469501392052645, "grad_norm": 0.1484239399433136, "learning_rate": 1.8808077748281415e-05, "loss": 0.569, "step": 2161 }, { "epoch": 0.5472032396861554, "grad_norm": 0.14622029662132263, "learning_rate": 1.8806943845519586e-05, "loss": 0.526, "step": 2162 }, { "epoch": 0.5474563401670464, "grad_norm": 0.14384983479976654, "learning_rate": 1.8805809437874346e-05, "loss": 0.5245, "step": 2163 }, { "epoch": 0.5477094406479373, "grad_norm": 0.1476360708475113, "learning_rate": 1.8804674525410734e-05, "loss": 0.536, "step": 2164 }, { "epoch": 0.5479625411288281, "grad_norm": 0.14406946301460266, "learning_rate": 1.880353910819381e-05, "loss": 0.5128, "step": 2165 }, { "epoch": 0.548215641609719, "grad_norm": 0.14427940547466278, "learning_rate": 1.8802403186288657e-05, "loss": 0.5478, "step": 2166 }, { "epoch": 0.5484687420906099, "grad_norm": 0.14759938418865204, "learning_rate": 1.8801266759760408e-05, "loss": 0.5471, "step": 2167 }, { "epoch": 0.5487218425715009, "grad_norm": 0.1455715447664261, "learning_rate": 1.8800129828674202e-05, "loss": 0.5435, "step": 2168 }, { "epoch": 0.5489749430523918, "grad_norm": 0.14836999773979187, "learning_rate": 1.879899239309522e-05, "loss": 0.5447, "step": 2169 }, { "epoch": 0.5492280435332827, "grad_norm": 0.14490824937820435, "learning_rate": 1.879785445308867e-05, "loss": 0.5628, "step": 2170 }, { "epoch": 0.5494811440141736, "grad_norm": 0.14529091119766235, "learning_rate": 1.8796716008719784e-05, "loss": 0.542, "step": 2171 }, { "epoch": 0.5497342444950646, "grad_norm": 0.14771731197834015, "learning_rate": 1.879557706005383e-05, "loss": 0.5596, "step": 2172 }, { "epoch": 0.5499873449759555, "grad_norm": 0.1474103182554245, "learning_rate": 1.8794437607156105e-05, "loss": 0.5354, "step": 2173 }, { "epoch": 0.5502404454568464, "grad_norm": 0.14134001731872559, "learning_rate": 1.8793297650091922e-05, "loss": 0.5248, "step": 2174 }, { "epoch": 0.5504935459377372, "grad_norm": 0.1465873420238495, "learning_rate": 1.8792157188926642e-05, "loss": 0.5564, "step": 2175 }, { "epoch": 0.5507466464186282, "grad_norm": 0.14788149297237396, "learning_rate": 1.8791016223725634e-05, "loss": 0.5726, "step": 2176 }, { "epoch": 0.5509997468995191, "grad_norm": 0.15029706060886383, "learning_rate": 1.878987475455432e-05, "loss": 0.5511, "step": 2177 }, { "epoch": 0.55125284738041, "grad_norm": 0.14880117774009705, "learning_rate": 1.878873278147813e-05, "loss": 0.5333, "step": 2178 }, { "epoch": 0.5515059478613009, "grad_norm": 0.1465713381767273, "learning_rate": 1.878759030456253e-05, "loss": 0.5935, "step": 2179 }, { "epoch": 0.5517590483421918, "grad_norm": 0.14811015129089355, "learning_rate": 1.878644732387302e-05, "loss": 0.5665, "step": 2180 }, { "epoch": 0.5520121488230828, "grad_norm": 0.14152702689170837, "learning_rate": 1.8785303839475126e-05, "loss": 0.5455, "step": 2181 }, { "epoch": 0.5522652493039737, "grad_norm": 0.14692920446395874, "learning_rate": 1.8784159851434395e-05, "loss": 0.5515, "step": 2182 }, { "epoch": 0.5525183497848646, "grad_norm": 0.14347903430461884, "learning_rate": 1.8783015359816415e-05, "loss": 0.5495, "step": 2183 }, { "epoch": 0.5527714502657555, "grad_norm": 0.1463506817817688, "learning_rate": 1.8781870364686795e-05, "loss": 0.525, "step": 2184 }, { "epoch": 0.5530245507466465, "grad_norm": 0.14531154930591583, "learning_rate": 1.8780724866111175e-05, "loss": 0.51, "step": 2185 }, { "epoch": 0.5532776512275374, "grad_norm": 0.1414245069026947, "learning_rate": 1.8779578864155222e-05, "loss": 0.5449, "step": 2186 }, { "epoch": 0.5535307517084282, "grad_norm": 0.15202537178993225, "learning_rate": 1.877843235888464e-05, "loss": 0.5324, "step": 2187 }, { "epoch": 0.5537838521893191, "grad_norm": 0.14601637423038483, "learning_rate": 1.8777285350365153e-05, "loss": 0.5444, "step": 2188 }, { "epoch": 0.55403695267021, "grad_norm": 0.14495153725147247, "learning_rate": 1.8776137838662513e-05, "loss": 0.5449, "step": 2189 }, { "epoch": 0.554290053151101, "grad_norm": 0.14015650749206543, "learning_rate": 1.8774989823842505e-05, "loss": 0.5079, "step": 2190 }, { "epoch": 0.5545431536319919, "grad_norm": 0.1459050178527832, "learning_rate": 1.877384130597095e-05, "loss": 0.5518, "step": 2191 }, { "epoch": 0.5547962541128828, "grad_norm": 0.14173124730587006, "learning_rate": 1.8772692285113684e-05, "loss": 0.5035, "step": 2192 }, { "epoch": 0.5550493545937737, "grad_norm": 0.14195701479911804, "learning_rate": 1.8771542761336574e-05, "loss": 0.5648, "step": 2193 }, { "epoch": 0.5553024550746647, "grad_norm": 0.15466424822807312, "learning_rate": 1.8770392734705523e-05, "loss": 0.5433, "step": 2194 }, { "epoch": 0.5555555555555556, "grad_norm": 0.14722977578639984, "learning_rate": 1.8769242205286463e-05, "loss": 0.5526, "step": 2195 }, { "epoch": 0.5558086560364465, "grad_norm": 0.1462264060974121, "learning_rate": 1.876809117314535e-05, "loss": 0.5403, "step": 2196 }, { "epoch": 0.5560617565173374, "grad_norm": 0.14811238646507263, "learning_rate": 1.876693963834817e-05, "loss": 0.559, "step": 2197 }, { "epoch": 0.5563148569982282, "grad_norm": 0.14408525824546814, "learning_rate": 1.8765787600960935e-05, "loss": 0.5405, "step": 2198 }, { "epoch": 0.5565679574791192, "grad_norm": 0.1455586701631546, "learning_rate": 1.8764635061049695e-05, "loss": 0.5599, "step": 2199 }, { "epoch": 0.5568210579600101, "grad_norm": 0.15099912881851196, "learning_rate": 1.8763482018680517e-05, "loss": 0.526, "step": 2200 }, { "epoch": 0.557074158440901, "grad_norm": 0.14662666618824005, "learning_rate": 1.8762328473919503e-05, "loss": 0.5466, "step": 2201 }, { "epoch": 0.5573272589217919, "grad_norm": 0.1466791331768036, "learning_rate": 1.8761174426832785e-05, "loss": 0.5418, "step": 2202 }, { "epoch": 0.5575803594026829, "grad_norm": 0.14384949207305908, "learning_rate": 1.876001987748652e-05, "loss": 0.4963, "step": 2203 }, { "epoch": 0.5578334598835738, "grad_norm": 0.1430201381444931, "learning_rate": 1.87588648259469e-05, "loss": 0.5282, "step": 2204 }, { "epoch": 0.5580865603644647, "grad_norm": 0.1408693492412567, "learning_rate": 1.875770927228014e-05, "loss": 0.549, "step": 2205 }, { "epoch": 0.5583396608453556, "grad_norm": 0.14347919821739197, "learning_rate": 1.8756553216552485e-05, "loss": 0.5466, "step": 2206 }, { "epoch": 0.5585927613262465, "grad_norm": 0.14288023114204407, "learning_rate": 1.8755396658830205e-05, "loss": 0.5286, "step": 2207 }, { "epoch": 0.5588458618071375, "grad_norm": 0.1465696394443512, "learning_rate": 1.875423959917961e-05, "loss": 0.5712, "step": 2208 }, { "epoch": 0.5590989622880284, "grad_norm": 0.15595479309558868, "learning_rate": 1.875308203766703e-05, "loss": 0.518, "step": 2209 }, { "epoch": 0.5593520627689192, "grad_norm": 0.14882028102874756, "learning_rate": 1.875192397435882e-05, "loss": 0.5272, "step": 2210 }, { "epoch": 0.5596051632498101, "grad_norm": 0.148061141371727, "learning_rate": 1.8750765409321375e-05, "loss": 0.5247, "step": 2211 }, { "epoch": 0.5598582637307011, "grad_norm": 0.145848348736763, "learning_rate": 1.874960634262111e-05, "loss": 0.5299, "step": 2212 }, { "epoch": 0.560111364211592, "grad_norm": 0.1456480175256729, "learning_rate": 1.8748446774324477e-05, "loss": 0.5334, "step": 2213 }, { "epoch": 0.5603644646924829, "grad_norm": 0.14741340279579163, "learning_rate": 1.8747286704497947e-05, "loss": 0.4874, "step": 2214 }, { "epoch": 0.5606175651733738, "grad_norm": 0.14687207341194153, "learning_rate": 1.874612613320802e-05, "loss": 0.5166, "step": 2215 }, { "epoch": 0.5608706656542647, "grad_norm": 0.1464032232761383, "learning_rate": 1.8744965060521245e-05, "loss": 0.554, "step": 2216 }, { "epoch": 0.5611237661351557, "grad_norm": 0.16772732138633728, "learning_rate": 1.8743803486504168e-05, "loss": 0.5445, "step": 2217 }, { "epoch": 0.5613768666160466, "grad_norm": 0.1479538083076477, "learning_rate": 1.874264141122338e-05, "loss": 0.5392, "step": 2218 }, { "epoch": 0.5616299670969375, "grad_norm": 0.1854136735200882, "learning_rate": 1.874147883474551e-05, "loss": 0.5588, "step": 2219 }, { "epoch": 0.5618830675778284, "grad_norm": 0.14578451216220856, "learning_rate": 1.8740315757137204e-05, "loss": 0.5041, "step": 2220 }, { "epoch": 0.5621361680587194, "grad_norm": 0.1439344435930252, "learning_rate": 1.8739152178465133e-05, "loss": 0.5386, "step": 2221 }, { "epoch": 0.5623892685396102, "grad_norm": 0.14381521940231323, "learning_rate": 1.8737988098796e-05, "loss": 0.5571, "step": 2222 }, { "epoch": 0.5626423690205011, "grad_norm": 0.15286916494369507, "learning_rate": 1.8736823518196556e-05, "loss": 0.5573, "step": 2223 }, { "epoch": 0.562895469501392, "grad_norm": 0.15535466372966766, "learning_rate": 1.8735658436733545e-05, "loss": 0.5573, "step": 2224 }, { "epoch": 0.563148569982283, "grad_norm": 0.14613652229309082, "learning_rate": 1.8734492854473766e-05, "loss": 0.5397, "step": 2225 }, { "epoch": 0.5634016704631739, "grad_norm": 0.1451738327741623, "learning_rate": 1.8733326771484043e-05, "loss": 0.5629, "step": 2226 }, { "epoch": 0.5636547709440648, "grad_norm": 0.14860254526138306, "learning_rate": 1.873216018783122e-05, "loss": 0.5463, "step": 2227 }, { "epoch": 0.5639078714249557, "grad_norm": 0.15874744951725006, "learning_rate": 1.8730993103582177e-05, "loss": 0.5412, "step": 2228 }, { "epoch": 0.5641609719058466, "grad_norm": 0.1528647392988205, "learning_rate": 1.8729825518803818e-05, "loss": 0.5532, "step": 2229 }, { "epoch": 0.5644140723867376, "grad_norm": 0.16232876479625702, "learning_rate": 1.8728657433563082e-05, "loss": 0.5397, "step": 2230 }, { "epoch": 0.5646671728676285, "grad_norm": 0.1460360735654831, "learning_rate": 1.872748884792693e-05, "loss": 0.5474, "step": 2231 }, { "epoch": 0.5649202733485194, "grad_norm": 0.14589981734752655, "learning_rate": 1.8726319761962358e-05, "loss": 0.5209, "step": 2232 }, { "epoch": 0.5651733738294102, "grad_norm": 0.14501315355300903, "learning_rate": 1.8725150175736383e-05, "loss": 0.5467, "step": 2233 }, { "epoch": 0.5654264743103012, "grad_norm": 0.150316059589386, "learning_rate": 1.8723980089316054e-05, "loss": 0.5632, "step": 2234 }, { "epoch": 0.5656795747911921, "grad_norm": 0.14659665524959564, "learning_rate": 1.8722809502768455e-05, "loss": 0.5366, "step": 2235 }, { "epoch": 0.565932675272083, "grad_norm": 0.14401592314243317, "learning_rate": 1.872163841616069e-05, "loss": 0.534, "step": 2236 }, { "epoch": 0.5661857757529739, "grad_norm": 0.14456796646118164, "learning_rate": 1.8720466829559896e-05, "loss": 0.5673, "step": 2237 }, { "epoch": 0.5664388762338648, "grad_norm": 0.1423196941614151, "learning_rate": 1.8719294743033235e-05, "loss": 0.5155, "step": 2238 }, { "epoch": 0.5666919767147558, "grad_norm": 0.1501038521528244, "learning_rate": 1.8718122156647905e-05, "loss": 0.5379, "step": 2239 }, { "epoch": 0.5669450771956467, "grad_norm": 0.14496496319770813, "learning_rate": 1.8716949070471123e-05, "loss": 0.5323, "step": 2240 }, { "epoch": 0.5671981776765376, "grad_norm": 0.18220806121826172, "learning_rate": 1.871577548457014e-05, "loss": 0.5159, "step": 2241 }, { "epoch": 0.5674512781574285, "grad_norm": 0.14391760528087616, "learning_rate": 1.871460139901224e-05, "loss": 0.5315, "step": 2242 }, { "epoch": 0.5677043786383195, "grad_norm": 0.14471866190433502, "learning_rate": 1.8713426813864727e-05, "loss": 0.5259, "step": 2243 }, { "epoch": 0.5679574791192104, "grad_norm": 0.15548956394195557, "learning_rate": 1.8712251729194936e-05, "loss": 0.5802, "step": 2244 }, { "epoch": 0.5682105796001012, "grad_norm": 0.14285972714424133, "learning_rate": 1.8711076145070234e-05, "loss": 0.5192, "step": 2245 }, { "epoch": 0.5684636800809921, "grad_norm": 0.14926937222480774, "learning_rate": 1.8709900061558016e-05, "loss": 0.5381, "step": 2246 }, { "epoch": 0.568716780561883, "grad_norm": 0.14849525690078735, "learning_rate": 1.8708723478725704e-05, "loss": 0.5575, "step": 2247 }, { "epoch": 0.568969881042774, "grad_norm": 0.14708726108074188, "learning_rate": 1.870754639664075e-05, "loss": 0.5146, "step": 2248 }, { "epoch": 0.5692229815236649, "grad_norm": 0.1492762565612793, "learning_rate": 1.870636881537063e-05, "loss": 0.5367, "step": 2249 }, { "epoch": 0.5694760820045558, "grad_norm": 0.1519898623228073, "learning_rate": 1.8705190734982853e-05, "loss": 0.5548, "step": 2250 }, { "epoch": 0.5697291824854467, "grad_norm": 0.1466929167509079, "learning_rate": 1.8704012155544958e-05, "loss": 0.5345, "step": 2251 }, { "epoch": 0.5699822829663377, "grad_norm": 0.1472339779138565, "learning_rate": 1.8702833077124508e-05, "loss": 0.5272, "step": 2252 }, { "epoch": 0.5702353834472286, "grad_norm": 0.13981996476650238, "learning_rate": 1.87016534997891e-05, "loss": 0.5352, "step": 2253 }, { "epoch": 0.5704884839281195, "grad_norm": 0.1445668786764145, "learning_rate": 1.8700473423606355e-05, "loss": 0.5247, "step": 2254 }, { "epoch": 0.5707415844090103, "grad_norm": 0.1454024463891983, "learning_rate": 1.8699292848643926e-05, "loss": 0.5476, "step": 2255 }, { "epoch": 0.5709946848899012, "grad_norm": 0.15462176501750946, "learning_rate": 1.8698111774969488e-05, "loss": 0.541, "step": 2256 }, { "epoch": 0.5712477853707922, "grad_norm": 0.1474360227584839, "learning_rate": 1.8696930202650755e-05, "loss": 0.5543, "step": 2257 }, { "epoch": 0.5715008858516831, "grad_norm": 0.14274410903453827, "learning_rate": 1.8695748131755463e-05, "loss": 0.5589, "step": 2258 }, { "epoch": 0.571753986332574, "grad_norm": 0.14441858232021332, "learning_rate": 1.8694565562351374e-05, "loss": 0.5442, "step": 2259 }, { "epoch": 0.5720070868134649, "grad_norm": 0.1430259644985199, "learning_rate": 1.869338249450629e-05, "loss": 0.5226, "step": 2260 }, { "epoch": 0.5722601872943559, "grad_norm": 0.14691650867462158, "learning_rate": 1.869219892828802e-05, "loss": 0.5594, "step": 2261 }, { "epoch": 0.5725132877752468, "grad_norm": 0.15393652021884918, "learning_rate": 1.8691014863764426e-05, "loss": 0.5436, "step": 2262 }, { "epoch": 0.5727663882561377, "grad_norm": 0.1516573429107666, "learning_rate": 1.8689830301003387e-05, "loss": 0.5349, "step": 2263 }, { "epoch": 0.5730194887370286, "grad_norm": 0.147565558552742, "learning_rate": 1.8688645240072813e-05, "loss": 0.5319, "step": 2264 }, { "epoch": 0.5732725892179195, "grad_norm": 0.14211657643318176, "learning_rate": 1.8687459681040634e-05, "loss": 0.504, "step": 2265 }, { "epoch": 0.5735256896988105, "grad_norm": 0.15049326419830322, "learning_rate": 1.868627362397482e-05, "loss": 0.5531, "step": 2266 }, { "epoch": 0.5737787901797013, "grad_norm": 0.14671093225479126, "learning_rate": 1.8685087068943365e-05, "loss": 0.5268, "step": 2267 }, { "epoch": 0.5740318906605922, "grad_norm": 0.1650523841381073, "learning_rate": 1.8683900016014292e-05, "loss": 0.5423, "step": 2268 }, { "epoch": 0.5742849911414831, "grad_norm": 0.1483929306268692, "learning_rate": 1.868271246525565e-05, "loss": 0.5411, "step": 2269 }, { "epoch": 0.5745380916223741, "grad_norm": 0.14577732980251312, "learning_rate": 1.8681524416735524e-05, "loss": 0.513, "step": 2270 }, { "epoch": 0.574791192103265, "grad_norm": 0.14955361187458038, "learning_rate": 1.868033587052202e-05, "loss": 0.5267, "step": 2271 }, { "epoch": 0.5750442925841559, "grad_norm": 0.1502561718225479, "learning_rate": 1.867914682668327e-05, "loss": 0.5302, "step": 2272 }, { "epoch": 0.5752973930650468, "grad_norm": 0.1462802290916443, "learning_rate": 1.867795728528744e-05, "loss": 0.5489, "step": 2273 }, { "epoch": 0.5755504935459378, "grad_norm": 0.1433577835559845, "learning_rate": 1.8676767246402735e-05, "loss": 0.5323, "step": 2274 }, { "epoch": 0.5758035940268287, "grad_norm": 0.1570679247379303, "learning_rate": 1.8675576710097365e-05, "loss": 0.5642, "step": 2275 }, { "epoch": 0.5760566945077196, "grad_norm": 0.14700014889240265, "learning_rate": 1.8674385676439584e-05, "loss": 0.5428, "step": 2276 }, { "epoch": 0.5763097949886105, "grad_norm": 0.14331628382205963, "learning_rate": 1.8673194145497675e-05, "loss": 0.5405, "step": 2277 }, { "epoch": 0.5765628954695013, "grad_norm": 0.1505105346441269, "learning_rate": 1.8672002117339944e-05, "loss": 0.5564, "step": 2278 }, { "epoch": 0.5768159959503923, "grad_norm": 0.1457102745771408, "learning_rate": 1.8670809592034724e-05, "loss": 0.5481, "step": 2279 }, { "epoch": 0.5770690964312832, "grad_norm": 0.15212388336658478, "learning_rate": 1.8669616569650384e-05, "loss": 0.5633, "step": 2280 }, { "epoch": 0.5773221969121741, "grad_norm": 0.14920096099376678, "learning_rate": 1.8668423050255317e-05, "loss": 0.5369, "step": 2281 }, { "epoch": 0.577575297393065, "grad_norm": 0.14896419644355774, "learning_rate": 1.8667229033917946e-05, "loss": 0.5323, "step": 2282 }, { "epoch": 0.577828397873956, "grad_norm": 0.14505890011787415, "learning_rate": 1.8666034520706718e-05, "loss": 0.5552, "step": 2283 }, { "epoch": 0.5780814983548469, "grad_norm": 0.14572517573833466, "learning_rate": 1.8664839510690114e-05, "loss": 0.5367, "step": 2284 }, { "epoch": 0.5783345988357378, "grad_norm": 0.13905175030231476, "learning_rate": 1.8663644003936642e-05, "loss": 0.5251, "step": 2285 }, { "epoch": 0.5785876993166287, "grad_norm": 0.1470813900232315, "learning_rate": 1.8662448000514834e-05, "loss": 0.54, "step": 2286 }, { "epoch": 0.5788407997975196, "grad_norm": 0.1497400999069214, "learning_rate": 1.866125150049326e-05, "loss": 0.5415, "step": 2287 }, { "epoch": 0.5790939002784106, "grad_norm": 0.15024422109127045, "learning_rate": 1.866005450394051e-05, "loss": 0.5501, "step": 2288 }, { "epoch": 0.5793470007593015, "grad_norm": 0.14332029223442078, "learning_rate": 1.8658857010925205e-05, "loss": 0.5296, "step": 2289 }, { "epoch": 0.5796001012401923, "grad_norm": 0.14805105328559875, "learning_rate": 1.8657659021515996e-05, "loss": 0.5874, "step": 2290 }, { "epoch": 0.5798532017210832, "grad_norm": 0.14845344424247742, "learning_rate": 1.8656460535781557e-05, "loss": 0.5426, "step": 2291 }, { "epoch": 0.5801063022019742, "grad_norm": 0.15134577453136444, "learning_rate": 1.86552615537906e-05, "loss": 0.5523, "step": 2292 }, { "epoch": 0.5803594026828651, "grad_norm": 0.14015473425388336, "learning_rate": 1.8654062075611857e-05, "loss": 0.5131, "step": 2293 }, { "epoch": 0.580612503163756, "grad_norm": 0.16274476051330566, "learning_rate": 1.8652862101314097e-05, "loss": 0.5529, "step": 2294 }, { "epoch": 0.5808656036446469, "grad_norm": 0.15004579722881317, "learning_rate": 1.86516616309661e-05, "loss": 0.5347, "step": 2295 }, { "epoch": 0.5811187041255378, "grad_norm": 0.14771991968154907, "learning_rate": 1.8650460664636702e-05, "loss": 0.5313, "step": 2296 }, { "epoch": 0.5813718046064288, "grad_norm": 0.15148195624351501, "learning_rate": 1.864925920239474e-05, "loss": 0.5418, "step": 2297 }, { "epoch": 0.5816249050873197, "grad_norm": 0.14730021357536316, "learning_rate": 1.8648057244309094e-05, "loss": 0.5132, "step": 2298 }, { "epoch": 0.5818780055682106, "grad_norm": 0.15396273136138916, "learning_rate": 1.8646854790448673e-05, "loss": 0.5329, "step": 2299 }, { "epoch": 0.5821311060491015, "grad_norm": 0.14523978531360626, "learning_rate": 1.8645651840882407e-05, "loss": 0.5529, "step": 2300 }, { "epoch": 0.5823842065299925, "grad_norm": 0.1450643390417099, "learning_rate": 1.8644448395679263e-05, "loss": 0.5157, "step": 2301 }, { "epoch": 0.5826373070108833, "grad_norm": 0.1512904018163681, "learning_rate": 1.8643244454908224e-05, "loss": 0.5279, "step": 2302 }, { "epoch": 0.5828904074917742, "grad_norm": 0.15034013986587524, "learning_rate": 1.864204001863832e-05, "loss": 0.5261, "step": 2303 }, { "epoch": 0.5831435079726651, "grad_norm": 0.14855656027793884, "learning_rate": 1.8640835086938593e-05, "loss": 0.5558, "step": 2304 }, { "epoch": 0.583396608453556, "grad_norm": 0.14781597256660461, "learning_rate": 1.863962965987812e-05, "loss": 0.5356, "step": 2305 }, { "epoch": 0.583649708934447, "grad_norm": 0.14822877943515778, "learning_rate": 1.8638423737526004e-05, "loss": 0.5268, "step": 2306 }, { "epoch": 0.5839028094153379, "grad_norm": 0.14480765163898468, "learning_rate": 1.863721731995138e-05, "loss": 0.5303, "step": 2307 }, { "epoch": 0.5841559098962288, "grad_norm": 0.1457347571849823, "learning_rate": 1.863601040722341e-05, "loss": 0.5483, "step": 2308 }, { "epoch": 0.5844090103771197, "grad_norm": 0.150473952293396, "learning_rate": 1.863480299941128e-05, "loss": 0.5518, "step": 2309 }, { "epoch": 0.5846621108580107, "grad_norm": 0.1446637511253357, "learning_rate": 1.863359509658421e-05, "loss": 0.5555, "step": 2310 }, { "epoch": 0.5849152113389016, "grad_norm": 0.15238513052463531, "learning_rate": 1.8632386698811448e-05, "loss": 0.5533, "step": 2311 }, { "epoch": 0.5851683118197925, "grad_norm": 0.14772778749465942, "learning_rate": 1.863117780616227e-05, "loss": 0.5573, "step": 2312 }, { "epoch": 0.5854214123006833, "grad_norm": 0.15186363458633423, "learning_rate": 1.8629968418705976e-05, "loss": 0.5362, "step": 2313 }, { "epoch": 0.5856745127815742, "grad_norm": 0.16332495212554932, "learning_rate": 1.8628758536511897e-05, "loss": 0.5473, "step": 2314 }, { "epoch": 0.5859276132624652, "grad_norm": 0.16811923682689667, "learning_rate": 1.8627548159649395e-05, "loss": 0.5479, "step": 2315 }, { "epoch": 0.5861807137433561, "grad_norm": 0.14447632431983948, "learning_rate": 1.8626337288187862e-05, "loss": 0.5457, "step": 2316 }, { "epoch": 0.586433814224247, "grad_norm": 0.1498802751302719, "learning_rate": 1.8625125922196707e-05, "loss": 0.5309, "step": 2317 }, { "epoch": 0.5866869147051379, "grad_norm": 0.14454385638237, "learning_rate": 1.862391406174538e-05, "loss": 0.5439, "step": 2318 }, { "epoch": 0.5869400151860289, "grad_norm": 0.1505521684885025, "learning_rate": 1.8622701706903354e-05, "loss": 0.5367, "step": 2319 }, { "epoch": 0.5871931156669198, "grad_norm": 0.14599654078483582, "learning_rate": 1.862148885774013e-05, "loss": 0.5464, "step": 2320 }, { "epoch": 0.5874462161478107, "grad_norm": 0.14601285755634308, "learning_rate": 1.8620275514325236e-05, "loss": 0.5436, "step": 2321 }, { "epoch": 0.5876993166287016, "grad_norm": 0.14640100300312042, "learning_rate": 1.8619061676728235e-05, "loss": 0.5207, "step": 2322 }, { "epoch": 0.5879524171095926, "grad_norm": 0.14489838480949402, "learning_rate": 1.861784734501871e-05, "loss": 0.5487, "step": 2323 }, { "epoch": 0.5882055175904835, "grad_norm": 0.14972306787967682, "learning_rate": 1.861663251926628e-05, "loss": 0.5453, "step": 2324 }, { "epoch": 0.5884586180713743, "grad_norm": 0.14117872714996338, "learning_rate": 1.861541719954058e-05, "loss": 0.5155, "step": 2325 }, { "epoch": 0.5887117185522652, "grad_norm": 0.1484600156545639, "learning_rate": 1.8614201385911292e-05, "loss": 0.5498, "step": 2326 }, { "epoch": 0.5889648190331561, "grad_norm": 0.15175874531269073, "learning_rate": 1.861298507844811e-05, "loss": 0.528, "step": 2327 }, { "epoch": 0.5892179195140471, "grad_norm": 0.1560920774936676, "learning_rate": 1.8611768277220764e-05, "loss": 0.5315, "step": 2328 }, { "epoch": 0.589471019994938, "grad_norm": 0.14526920020580292, "learning_rate": 1.8610550982299007e-05, "loss": 0.5317, "step": 2329 }, { "epoch": 0.5897241204758289, "grad_norm": 0.15285490453243256, "learning_rate": 1.860933319375263e-05, "loss": 0.5376, "step": 2330 }, { "epoch": 0.5899772209567198, "grad_norm": 0.14467842876911163, "learning_rate": 1.860811491165144e-05, "loss": 0.514, "step": 2331 }, { "epoch": 0.5902303214376108, "grad_norm": 0.14383679628372192, "learning_rate": 1.8606896136065288e-05, "loss": 0.5353, "step": 2332 }, { "epoch": 0.5904834219185017, "grad_norm": 0.14506690204143524, "learning_rate": 1.8605676867064034e-05, "loss": 0.5497, "step": 2333 }, { "epoch": 0.5907365223993926, "grad_norm": 0.1569226235151291, "learning_rate": 1.8604457104717582e-05, "loss": 0.546, "step": 2334 }, { "epoch": 0.5909896228802834, "grad_norm": 0.14538037776947021, "learning_rate": 1.860323684909586e-05, "loss": 0.5406, "step": 2335 }, { "epoch": 0.5912427233611743, "grad_norm": 0.14282667636871338, "learning_rate": 1.8602016100268812e-05, "loss": 0.511, "step": 2336 }, { "epoch": 0.5914958238420653, "grad_norm": 0.14653204381465912, "learning_rate": 1.860079485830643e-05, "loss": 0.5259, "step": 2337 }, { "epoch": 0.5917489243229562, "grad_norm": 0.14614234864711761, "learning_rate": 1.8599573123278724e-05, "loss": 0.5176, "step": 2338 }, { "epoch": 0.5920020248038471, "grad_norm": 0.14530158042907715, "learning_rate": 1.8598350895255734e-05, "loss": 0.5582, "step": 2339 }, { "epoch": 0.592255125284738, "grad_norm": 0.1459869146347046, "learning_rate": 1.8597128174307524e-05, "loss": 0.5513, "step": 2340 }, { "epoch": 0.592508225765629, "grad_norm": 0.14628392457962036, "learning_rate": 1.859590496050419e-05, "loss": 0.5419, "step": 2341 }, { "epoch": 0.5927613262465199, "grad_norm": 0.15488635003566742, "learning_rate": 1.8594681253915864e-05, "loss": 0.5631, "step": 2342 }, { "epoch": 0.5930144267274108, "grad_norm": 0.15236783027648926, "learning_rate": 1.859345705461269e-05, "loss": 0.5406, "step": 2343 }, { "epoch": 0.5932675272083017, "grad_norm": 0.14610011875629425, "learning_rate": 1.8592232362664853e-05, "loss": 0.5398, "step": 2344 }, { "epoch": 0.5935206276891926, "grad_norm": 0.15310116112232208, "learning_rate": 1.859100717814256e-05, "loss": 0.538, "step": 2345 }, { "epoch": 0.5937737281700836, "grad_norm": 0.14699193835258484, "learning_rate": 1.858978150111605e-05, "loss": 0.4988, "step": 2346 }, { "epoch": 0.5940268286509744, "grad_norm": 0.1398555487394333, "learning_rate": 1.858855533165559e-05, "loss": 0.5139, "step": 2347 }, { "epoch": 0.5942799291318653, "grad_norm": 0.14508651196956635, "learning_rate": 1.8587328669831467e-05, "loss": 0.5433, "step": 2348 }, { "epoch": 0.5945330296127562, "grad_norm": 0.1478363573551178, "learning_rate": 1.858610151571401e-05, "loss": 0.5223, "step": 2349 }, { "epoch": 0.5947861300936472, "grad_norm": 0.14768554270267487, "learning_rate": 1.858487386937356e-05, "loss": 0.5343, "step": 2350 }, { "epoch": 0.5950392305745381, "grad_norm": 0.14980356395244598, "learning_rate": 1.8583645730880508e-05, "loss": 0.5467, "step": 2351 }, { "epoch": 0.595292331055429, "grad_norm": 0.14696039259433746, "learning_rate": 1.8582417100305252e-05, "loss": 0.555, "step": 2352 }, { "epoch": 0.5955454315363199, "grad_norm": 0.17784389853477478, "learning_rate": 1.858118797771823e-05, "loss": 0.5386, "step": 2353 }, { "epoch": 0.5957985320172108, "grad_norm": 0.14381876587867737, "learning_rate": 1.8579958363189906e-05, "loss": 0.5349, "step": 2354 }, { "epoch": 0.5960516324981018, "grad_norm": 0.15086035430431366, "learning_rate": 1.857872825679077e-05, "loss": 0.5461, "step": 2355 }, { "epoch": 0.5963047329789927, "grad_norm": 0.15046915411949158, "learning_rate": 1.8577497658591333e-05, "loss": 0.5328, "step": 2356 }, { "epoch": 0.5965578334598836, "grad_norm": 0.14956995844841003, "learning_rate": 1.8576266568662156e-05, "loss": 0.5735, "step": 2357 }, { "epoch": 0.5968109339407744, "grad_norm": 0.1443953663110733, "learning_rate": 1.8575034987073806e-05, "loss": 0.5291, "step": 2358 }, { "epoch": 0.5970640344216654, "grad_norm": 0.1495794653892517, "learning_rate": 1.8573802913896895e-05, "loss": 0.5279, "step": 2359 }, { "epoch": 0.5973171349025563, "grad_norm": 0.14987659454345703, "learning_rate": 1.8572570349202045e-05, "loss": 0.5253, "step": 2360 }, { "epoch": 0.5975702353834472, "grad_norm": 0.14494718611240387, "learning_rate": 1.8571337293059923e-05, "loss": 0.5352, "step": 2361 }, { "epoch": 0.5978233358643381, "grad_norm": 0.14781028032302856, "learning_rate": 1.857010374554122e-05, "loss": 0.5185, "step": 2362 }, { "epoch": 0.598076436345229, "grad_norm": 0.14690136909484863, "learning_rate": 1.856886970671664e-05, "loss": 0.5433, "step": 2363 }, { "epoch": 0.59832953682612, "grad_norm": 0.14500904083251953, "learning_rate": 1.8567635176656945e-05, "loss": 0.5261, "step": 2364 }, { "epoch": 0.5985826373070109, "grad_norm": 0.14209990203380585, "learning_rate": 1.8566400155432893e-05, "loss": 0.5517, "step": 2365 }, { "epoch": 0.5988357377879018, "grad_norm": 0.1500253528356552, "learning_rate": 1.8565164643115296e-05, "loss": 0.5416, "step": 2366 }, { "epoch": 0.5990888382687927, "grad_norm": 0.14733339846134186, "learning_rate": 1.8563928639774976e-05, "loss": 0.5387, "step": 2367 }, { "epoch": 0.5993419387496837, "grad_norm": 0.14200736582279205, "learning_rate": 1.8562692145482793e-05, "loss": 0.5811, "step": 2368 }, { "epoch": 0.5995950392305746, "grad_norm": 0.14497509598731995, "learning_rate": 1.8561455160309634e-05, "loss": 0.5104, "step": 2369 }, { "epoch": 0.5998481397114654, "grad_norm": 0.13897013664245605, "learning_rate": 1.856021768432641e-05, "loss": 0.5241, "step": 2370 }, { "epoch": 0.6001012401923563, "grad_norm": 0.14394894242286682, "learning_rate": 1.8558979717604065e-05, "loss": 0.5312, "step": 2371 }, { "epoch": 0.6003543406732473, "grad_norm": 0.14755356311798096, "learning_rate": 1.8557741260213572e-05, "loss": 0.5343, "step": 2372 }, { "epoch": 0.6006074411541382, "grad_norm": 0.15196183323860168, "learning_rate": 1.8556502312225922e-05, "loss": 0.568, "step": 2373 }, { "epoch": 0.6008605416350291, "grad_norm": 0.14456459879875183, "learning_rate": 1.8555262873712145e-05, "loss": 0.5162, "step": 2374 }, { "epoch": 0.60111364211592, "grad_norm": 0.15117591619491577, "learning_rate": 1.8554022944743296e-05, "loss": 0.5446, "step": 2375 }, { "epoch": 0.6013667425968109, "grad_norm": 0.14727413654327393, "learning_rate": 1.8552782525390458e-05, "loss": 0.5459, "step": 2376 }, { "epoch": 0.6016198430777019, "grad_norm": 0.14920492470264435, "learning_rate": 1.855154161572474e-05, "loss": 0.5383, "step": 2377 }, { "epoch": 0.6018729435585928, "grad_norm": 0.1506134271621704, "learning_rate": 1.8550300215817277e-05, "loss": 0.55, "step": 2378 }, { "epoch": 0.6021260440394837, "grad_norm": 0.14898306131362915, "learning_rate": 1.8549058325739246e-05, "loss": 0.5564, "step": 2379 }, { "epoch": 0.6023791445203746, "grad_norm": 0.14046598970890045, "learning_rate": 1.8547815945561832e-05, "loss": 0.5334, "step": 2380 }, { "epoch": 0.6026322450012656, "grad_norm": 0.14773893356323242, "learning_rate": 1.8546573075356265e-05, "loss": 0.5528, "step": 2381 }, { "epoch": 0.6028853454821564, "grad_norm": 0.14395421743392944, "learning_rate": 1.8545329715193786e-05, "loss": 0.5663, "step": 2382 }, { "epoch": 0.6031384459630473, "grad_norm": 0.156617671251297, "learning_rate": 1.854408586514569e-05, "loss": 0.5349, "step": 2383 }, { "epoch": 0.6033915464439382, "grad_norm": 0.15281963348388672, "learning_rate": 1.8542841525283268e-05, "loss": 0.5849, "step": 2384 }, { "epoch": 0.6036446469248291, "grad_norm": 0.1514674872159958, "learning_rate": 1.8541596695677868e-05, "loss": 0.5624, "step": 2385 }, { "epoch": 0.6038977474057201, "grad_norm": 0.1519140750169754, "learning_rate": 1.8540351376400848e-05, "loss": 0.5502, "step": 2386 }, { "epoch": 0.604150847886611, "grad_norm": 0.14331114292144775, "learning_rate": 1.8539105567523598e-05, "loss": 0.5499, "step": 2387 }, { "epoch": 0.6044039483675019, "grad_norm": 0.1429108828306198, "learning_rate": 1.8537859269117538e-05, "loss": 0.5287, "step": 2388 }, { "epoch": 0.6046570488483928, "grad_norm": 0.1414855569601059, "learning_rate": 1.8536612481254116e-05, "loss": 0.5485, "step": 2389 }, { "epoch": 0.6049101493292838, "grad_norm": 0.15185344219207764, "learning_rate": 1.8535365204004815e-05, "loss": 0.5415, "step": 2390 }, { "epoch": 0.6051632498101747, "grad_norm": 0.14885537326335907, "learning_rate": 1.8534117437441125e-05, "loss": 0.5474, "step": 2391 }, { "epoch": 0.6054163502910656, "grad_norm": 0.14955249428749084, "learning_rate": 1.853286918163459e-05, "loss": 0.5135, "step": 2392 }, { "epoch": 0.6056694507719564, "grad_norm": 0.1478264480829239, "learning_rate": 1.8531620436656762e-05, "loss": 0.5503, "step": 2393 }, { "epoch": 0.6059225512528473, "grad_norm": 0.14942239224910736, "learning_rate": 1.8530371202579238e-05, "loss": 0.517, "step": 2394 }, { "epoch": 0.6061756517337383, "grad_norm": 0.15352338552474976, "learning_rate": 1.852912147947362e-05, "loss": 0.5355, "step": 2395 }, { "epoch": 0.6064287522146292, "grad_norm": 0.14409947395324707, "learning_rate": 1.8527871267411564e-05, "loss": 0.5252, "step": 2396 }, { "epoch": 0.6066818526955201, "grad_norm": 0.14866919815540314, "learning_rate": 1.852662056646474e-05, "loss": 0.5382, "step": 2397 }, { "epoch": 0.606934953176411, "grad_norm": 0.14645080268383026, "learning_rate": 1.852536937670484e-05, "loss": 0.533, "step": 2398 }, { "epoch": 0.607188053657302, "grad_norm": 0.1523124724626541, "learning_rate": 1.8524117698203605e-05, "loss": 0.5808, "step": 2399 }, { "epoch": 0.6074411541381929, "grad_norm": 0.14742736518383026, "learning_rate": 1.852286553103278e-05, "loss": 0.5433, "step": 2400 }, { "epoch": 0.6076942546190838, "grad_norm": 0.15319399535655975, "learning_rate": 1.8521612875264154e-05, "loss": 0.5473, "step": 2401 }, { "epoch": 0.6079473550999747, "grad_norm": 0.15313813090324402, "learning_rate": 1.852035973096954e-05, "loss": 0.5392, "step": 2402 }, { "epoch": 0.6082004555808656, "grad_norm": 0.1409512609243393, "learning_rate": 1.8519106098220775e-05, "loss": 0.5332, "step": 2403 }, { "epoch": 0.6084535560617566, "grad_norm": 0.14349418878555298, "learning_rate": 1.8517851977089727e-05, "loss": 0.5146, "step": 2404 }, { "epoch": 0.6087066565426474, "grad_norm": 0.15203545987606049, "learning_rate": 1.8516597367648295e-05, "loss": 0.5236, "step": 2405 }, { "epoch": 0.6089597570235383, "grad_norm": 0.15104606747627258, "learning_rate": 1.85153422699684e-05, "loss": 0.5633, "step": 2406 }, { "epoch": 0.6092128575044292, "grad_norm": 0.14073584973812103, "learning_rate": 1.8514086684121997e-05, "loss": 0.5533, "step": 2407 }, { "epoch": 0.6094659579853202, "grad_norm": 0.14999449253082275, "learning_rate": 1.8512830610181067e-05, "loss": 0.5343, "step": 2408 }, { "epoch": 0.6097190584662111, "grad_norm": 0.14399008452892303, "learning_rate": 1.8511574048217614e-05, "loss": 0.515, "step": 2409 }, { "epoch": 0.609972158947102, "grad_norm": 0.14407195150852203, "learning_rate": 1.8510316998303675e-05, "loss": 0.5277, "step": 2410 }, { "epoch": 0.6102252594279929, "grad_norm": 0.14424079656600952, "learning_rate": 1.8509059460511315e-05, "loss": 0.5507, "step": 2411 }, { "epoch": 0.6104783599088838, "grad_norm": 0.15625551342964172, "learning_rate": 1.8507801434912626e-05, "loss": 0.519, "step": 2412 }, { "epoch": 0.6107314603897748, "grad_norm": 0.15180808305740356, "learning_rate": 1.8506542921579732e-05, "loss": 0.5304, "step": 2413 }, { "epoch": 0.6109845608706657, "grad_norm": 0.14952440559864044, "learning_rate": 1.8505283920584773e-05, "loss": 0.5348, "step": 2414 }, { "epoch": 0.6112376613515565, "grad_norm": 0.139666348695755, "learning_rate": 1.850402443199993e-05, "loss": 0.5515, "step": 2415 }, { "epoch": 0.6114907618324474, "grad_norm": 0.14391599595546722, "learning_rate": 1.8502764455897398e-05, "loss": 0.5238, "step": 2416 }, { "epoch": 0.6117438623133384, "grad_norm": 0.1433994621038437, "learning_rate": 1.8501503992349426e-05, "loss": 0.5279, "step": 2417 }, { "epoch": 0.6119969627942293, "grad_norm": 0.14672143757343292, "learning_rate": 1.850024304142826e-05, "loss": 0.5533, "step": 2418 }, { "epoch": 0.6122500632751202, "grad_norm": 0.14460209012031555, "learning_rate": 1.849898160320619e-05, "loss": 0.5376, "step": 2419 }, { "epoch": 0.6125031637560111, "grad_norm": 0.146707683801651, "learning_rate": 1.8497719677755537e-05, "loss": 0.5413, "step": 2420 }, { "epoch": 0.6127562642369021, "grad_norm": 0.2081783264875412, "learning_rate": 1.849645726514864e-05, "loss": 0.5439, "step": 2421 }, { "epoch": 0.613009364717793, "grad_norm": 0.15196694433689117, "learning_rate": 1.8495194365457866e-05, "loss": 0.5475, "step": 2422 }, { "epoch": 0.6132624651986839, "grad_norm": 0.14226335287094116, "learning_rate": 1.8493930978755625e-05, "loss": 0.5598, "step": 2423 }, { "epoch": 0.6135155656795748, "grad_norm": 0.1489374339580536, "learning_rate": 1.8492667105114335e-05, "loss": 0.5462, "step": 2424 }, { "epoch": 0.6137686661604657, "grad_norm": 0.14674276113510132, "learning_rate": 1.8491402744606456e-05, "loss": 0.5588, "step": 2425 }, { "epoch": 0.6140217666413567, "grad_norm": 0.14677469432353973, "learning_rate": 1.8490137897304474e-05, "loss": 0.527, "step": 2426 }, { "epoch": 0.6142748671222475, "grad_norm": 0.15478141605854034, "learning_rate": 1.848887256328089e-05, "loss": 0.5391, "step": 2427 }, { "epoch": 0.6145279676031384, "grad_norm": 0.1517610400915146, "learning_rate": 1.848760674260825e-05, "loss": 0.5195, "step": 2428 }, { "epoch": 0.6147810680840293, "grad_norm": 0.1444411426782608, "learning_rate": 1.8486340435359125e-05, "loss": 0.53, "step": 2429 }, { "epoch": 0.6150341685649203, "grad_norm": 0.14189781248569489, "learning_rate": 1.84850736416061e-05, "loss": 0.5274, "step": 2430 }, { "epoch": 0.6152872690458112, "grad_norm": 0.14328473806381226, "learning_rate": 1.84838063614218e-05, "loss": 0.5269, "step": 2431 }, { "epoch": 0.6155403695267021, "grad_norm": 0.15161679685115814, "learning_rate": 1.848253859487888e-05, "loss": 0.5177, "step": 2432 }, { "epoch": 0.615793470007593, "grad_norm": 0.14411920309066772, "learning_rate": 1.848127034205002e-05, "loss": 0.5102, "step": 2433 }, { "epoch": 0.6160465704884839, "grad_norm": 0.1530870944261551, "learning_rate": 1.8480001603007914e-05, "loss": 0.5249, "step": 2434 }, { "epoch": 0.6162996709693749, "grad_norm": 0.1427738517522812, "learning_rate": 1.847873237782531e-05, "loss": 0.544, "step": 2435 }, { "epoch": 0.6165527714502658, "grad_norm": 0.14298276603221893, "learning_rate": 1.8477462666574963e-05, "loss": 0.5407, "step": 2436 }, { "epoch": 0.6168058719311567, "grad_norm": 0.16381780803203583, "learning_rate": 1.8476192469329663e-05, "loss": 0.536, "step": 2437 }, { "epoch": 0.6170589724120475, "grad_norm": 0.14814378321170807, "learning_rate": 1.847492178616223e-05, "loss": 0.5579, "step": 2438 }, { "epoch": 0.6173120728929385, "grad_norm": 0.14587941765785217, "learning_rate": 1.8473650617145507e-05, "loss": 0.5485, "step": 2439 }, { "epoch": 0.6175651733738294, "grad_norm": 0.20234888792037964, "learning_rate": 1.847237896235237e-05, "loss": 0.5435, "step": 2440 }, { "epoch": 0.6178182738547203, "grad_norm": 0.14384739100933075, "learning_rate": 1.847110682185572e-05, "loss": 0.5457, "step": 2441 }, { "epoch": 0.6180713743356112, "grad_norm": 0.14170606434345245, "learning_rate": 1.8469834195728484e-05, "loss": 0.5325, "step": 2442 }, { "epoch": 0.6183244748165021, "grad_norm": 0.14463554322719574, "learning_rate": 1.846856108404362e-05, "loss": 0.5357, "step": 2443 }, { "epoch": 0.6185775752973931, "grad_norm": 0.14453023672103882, "learning_rate": 1.8467287486874114e-05, "loss": 0.523, "step": 2444 }, { "epoch": 0.618830675778284, "grad_norm": 0.14751245081424713, "learning_rate": 1.8466013404292974e-05, "loss": 0.5157, "step": 2445 }, { "epoch": 0.6190837762591749, "grad_norm": 0.1481376588344574, "learning_rate": 1.8464738836373248e-05, "loss": 0.533, "step": 2446 }, { "epoch": 0.6193368767400658, "grad_norm": 0.15021999180316925, "learning_rate": 1.8463463783187997e-05, "loss": 0.5214, "step": 2447 }, { "epoch": 0.6195899772209568, "grad_norm": 0.1563500463962555, "learning_rate": 1.8462188244810324e-05, "loss": 0.5549, "step": 2448 }, { "epoch": 0.6198430777018477, "grad_norm": 0.14146524667739868, "learning_rate": 1.846091222131335e-05, "loss": 0.5374, "step": 2449 }, { "epoch": 0.6200961781827385, "grad_norm": 0.1462208777666092, "learning_rate": 1.845963571277022e-05, "loss": 0.5402, "step": 2450 }, { "epoch": 0.6203492786636294, "grad_norm": 0.14394448697566986, "learning_rate": 1.8458358719254125e-05, "loss": 0.5462, "step": 2451 }, { "epoch": 0.6206023791445203, "grad_norm": 0.14380502700805664, "learning_rate": 1.8457081240838265e-05, "loss": 0.5331, "step": 2452 }, { "epoch": 0.6208554796254113, "grad_norm": 0.13871550559997559, "learning_rate": 1.8455803277595877e-05, "loss": 0.5364, "step": 2453 }, { "epoch": 0.6211085801063022, "grad_norm": 0.16735586524009705, "learning_rate": 1.8454524829600223e-05, "loss": 0.566, "step": 2454 }, { "epoch": 0.6213616805871931, "grad_norm": 0.16703766584396362, "learning_rate": 1.8453245896924595e-05, "loss": 0.5477, "step": 2455 }, { "epoch": 0.621614781068084, "grad_norm": 0.1464633047580719, "learning_rate": 1.8451966479642312e-05, "loss": 0.5451, "step": 2456 }, { "epoch": 0.621867881548975, "grad_norm": 0.1467081904411316, "learning_rate": 1.845068657782672e-05, "loss": 0.5581, "step": 2457 }, { "epoch": 0.6221209820298659, "grad_norm": 0.14057934284210205, "learning_rate": 1.844940619155119e-05, "loss": 0.5022, "step": 2458 }, { "epoch": 0.6223740825107568, "grad_norm": 0.14661408960819244, "learning_rate": 1.8448125320889127e-05, "loss": 0.5311, "step": 2459 }, { "epoch": 0.6226271829916477, "grad_norm": 0.14641529321670532, "learning_rate": 1.8446843965913962e-05, "loss": 0.5181, "step": 2460 }, { "epoch": 0.6228802834725385, "grad_norm": 0.14614209532737732, "learning_rate": 1.844556212669915e-05, "loss": 0.5363, "step": 2461 }, { "epoch": 0.6231333839534295, "grad_norm": 0.14240095019340515, "learning_rate": 1.844427980331817e-05, "loss": 0.5336, "step": 2462 }, { "epoch": 0.6233864844343204, "grad_norm": 0.14581473171710968, "learning_rate": 1.844299699584455e-05, "loss": 0.5273, "step": 2463 }, { "epoch": 0.6236395849152113, "grad_norm": 0.14479555189609528, "learning_rate": 1.8441713704351815e-05, "loss": 0.5366, "step": 2464 }, { "epoch": 0.6238926853961022, "grad_norm": 0.14247867465019226, "learning_rate": 1.8440429928913545e-05, "loss": 0.5473, "step": 2465 }, { "epoch": 0.6241457858769932, "grad_norm": 0.1444939225912094, "learning_rate": 1.843914566960333e-05, "loss": 0.5127, "step": 2466 }, { "epoch": 0.6243988863578841, "grad_norm": 0.14626888930797577, "learning_rate": 1.8437860926494794e-05, "loss": 0.5229, "step": 2467 }, { "epoch": 0.624651986838775, "grad_norm": 0.15107285976409912, "learning_rate": 1.8436575699661592e-05, "loss": 0.529, "step": 2468 }, { "epoch": 0.6249050873196659, "grad_norm": 0.1470448076725006, "learning_rate": 1.84352899891774e-05, "loss": 0.5315, "step": 2469 }, { "epoch": 0.6251581878005569, "grad_norm": 0.1635686606168747, "learning_rate": 1.8434003795115928e-05, "loss": 0.532, "step": 2470 }, { "epoch": 0.6254112882814478, "grad_norm": 0.15282386541366577, "learning_rate": 1.8432717117550908e-05, "loss": 0.5301, "step": 2471 }, { "epoch": 0.6256643887623387, "grad_norm": 0.14698238670825958, "learning_rate": 1.84314299565561e-05, "loss": 0.5389, "step": 2472 }, { "epoch": 0.6259174892432295, "grad_norm": 0.14671845734119415, "learning_rate": 1.8430142312205302e-05, "loss": 0.5372, "step": 2473 }, { "epoch": 0.6261705897241204, "grad_norm": 0.14897477626800537, "learning_rate": 1.8428854184572328e-05, "loss": 0.5462, "step": 2474 }, { "epoch": 0.6264236902050114, "grad_norm": 0.15275777876377106, "learning_rate": 1.842756557373102e-05, "loss": 0.5164, "step": 2475 }, { "epoch": 0.6266767906859023, "grad_norm": 0.3233988583087921, "learning_rate": 1.8426276479755258e-05, "loss": 0.5316, "step": 2476 }, { "epoch": 0.6269298911667932, "grad_norm": 0.18216943740844727, "learning_rate": 1.8424986902718935e-05, "loss": 0.5207, "step": 2477 }, { "epoch": 0.6271829916476841, "grad_norm": 0.14639009535312653, "learning_rate": 1.842369684269599e-05, "loss": 0.5525, "step": 2478 }, { "epoch": 0.6274360921285751, "grad_norm": 0.1494276374578476, "learning_rate": 1.842240629976037e-05, "loss": 0.5458, "step": 2479 }, { "epoch": 0.627689192609466, "grad_norm": 0.14708733558654785, "learning_rate": 1.8421115273986064e-05, "loss": 0.5474, "step": 2480 }, { "epoch": 0.6279422930903569, "grad_norm": 0.14327023923397064, "learning_rate": 1.8419823765447082e-05, "loss": 0.5489, "step": 2481 }, { "epoch": 0.6281953935712478, "grad_norm": 0.14275063574314117, "learning_rate": 1.8418531774217463e-05, "loss": 0.5303, "step": 2482 }, { "epoch": 0.6284484940521387, "grad_norm": 0.14552482962608337, "learning_rate": 1.8417239300371273e-05, "loss": 0.5427, "step": 2483 }, { "epoch": 0.6287015945330297, "grad_norm": 0.14963699877262115, "learning_rate": 1.8415946343982614e-05, "loss": 0.5655, "step": 2484 }, { "epoch": 0.6289546950139205, "grad_norm": 0.14270426332950592, "learning_rate": 1.84146529051256e-05, "loss": 0.5368, "step": 2485 }, { "epoch": 0.6292077954948114, "grad_norm": 0.15036006271839142, "learning_rate": 1.8413358983874387e-05, "loss": 0.5523, "step": 2486 }, { "epoch": 0.6294608959757023, "grad_norm": 0.1527857482433319, "learning_rate": 1.841206458030315e-05, "loss": 0.5378, "step": 2487 }, { "epoch": 0.6297139964565933, "grad_norm": 0.14708644151687622, "learning_rate": 1.8410769694486093e-05, "loss": 0.5349, "step": 2488 }, { "epoch": 0.6299670969374842, "grad_norm": 0.1494288295507431, "learning_rate": 1.8409474326497455e-05, "loss": 0.5604, "step": 2489 }, { "epoch": 0.6302201974183751, "grad_norm": 0.14631909132003784, "learning_rate": 1.8408178476411487e-05, "loss": 0.5081, "step": 2490 }, { "epoch": 0.630473297899266, "grad_norm": 0.15233495831489563, "learning_rate": 1.8406882144302483e-05, "loss": 0.5179, "step": 2491 }, { "epoch": 0.6307263983801569, "grad_norm": 0.14197777211666107, "learning_rate": 1.840558533024476e-05, "loss": 0.5134, "step": 2492 }, { "epoch": 0.6309794988610479, "grad_norm": 0.14182843267917633, "learning_rate": 1.8404288034312664e-05, "loss": 0.525, "step": 2493 }, { "epoch": 0.6312325993419388, "grad_norm": 0.15093275904655457, "learning_rate": 1.8402990256580556e-05, "loss": 0.5374, "step": 2494 }, { "epoch": 0.6314856998228296, "grad_norm": 0.14455483853816986, "learning_rate": 1.8401691997122844e-05, "loss": 0.5121, "step": 2495 }, { "epoch": 0.6317388003037205, "grad_norm": 0.151828333735466, "learning_rate": 1.8400393256013955e-05, "loss": 0.5319, "step": 2496 }, { "epoch": 0.6319919007846115, "grad_norm": 0.14533530175685883, "learning_rate": 1.8399094033328336e-05, "loss": 0.5333, "step": 2497 }, { "epoch": 0.6322450012655024, "grad_norm": 0.1414755880832672, "learning_rate": 1.8397794329140475e-05, "loss": 0.5117, "step": 2498 }, { "epoch": 0.6324981017463933, "grad_norm": 0.1505860835313797, "learning_rate": 1.8396494143524877e-05, "loss": 0.5003, "step": 2499 }, { "epoch": 0.6327512022272842, "grad_norm": 0.14254526793956757, "learning_rate": 1.8395193476556085e-05, "loss": 0.5017, "step": 2500 }, { "epoch": 0.6330043027081751, "grad_norm": 0.14562487602233887, "learning_rate": 1.8393892328308654e-05, "loss": 0.5563, "step": 2501 }, { "epoch": 0.6332574031890661, "grad_norm": 0.14674434065818787, "learning_rate": 1.8392590698857184e-05, "loss": 0.5054, "step": 2502 }, { "epoch": 0.633510503669957, "grad_norm": 0.1408618837594986, "learning_rate": 1.839128858827629e-05, "loss": 0.5065, "step": 2503 }, { "epoch": 0.6337636041508479, "grad_norm": 0.14369329810142517, "learning_rate": 1.8389985996640623e-05, "loss": 0.5329, "step": 2504 }, { "epoch": 0.6340167046317388, "grad_norm": 0.14057013392448425, "learning_rate": 1.8388682924024854e-05, "loss": 0.5195, "step": 2505 }, { "epoch": 0.6342698051126298, "grad_norm": 0.1421726793050766, "learning_rate": 1.8387379370503692e-05, "loss": 0.5104, "step": 2506 }, { "epoch": 0.6345229055935206, "grad_norm": 0.14999838173389435, "learning_rate": 1.8386075336151857e-05, "loss": 0.5628, "step": 2507 }, { "epoch": 0.6347760060744115, "grad_norm": 0.15406276285648346, "learning_rate": 1.8384770821044115e-05, "loss": 0.5568, "step": 2508 }, { "epoch": 0.6350291065553024, "grad_norm": 0.14896829426288605, "learning_rate": 1.8383465825255244e-05, "loss": 0.552, "step": 2509 }, { "epoch": 0.6352822070361933, "grad_norm": 0.1516672670841217, "learning_rate": 1.8382160348860063e-05, "loss": 0.5199, "step": 2510 }, { "epoch": 0.6355353075170843, "grad_norm": 0.1618236005306244, "learning_rate": 1.8380854391933413e-05, "loss": 0.5039, "step": 2511 }, { "epoch": 0.6357884079979752, "grad_norm": 0.14674592018127441, "learning_rate": 1.8379547954550157e-05, "loss": 0.544, "step": 2512 }, { "epoch": 0.6360415084788661, "grad_norm": 0.14451873302459717, "learning_rate": 1.8378241036785186e-05, "loss": 0.5554, "step": 2513 }, { "epoch": 0.636294608959757, "grad_norm": 0.14248131215572357, "learning_rate": 1.8376933638713438e-05, "loss": 0.5241, "step": 2514 }, { "epoch": 0.636547709440648, "grad_norm": 0.1481953114271164, "learning_rate": 1.8375625760409848e-05, "loss": 0.5685, "step": 2515 }, { "epoch": 0.6368008099215389, "grad_norm": 0.15451690554618835, "learning_rate": 1.8374317401949403e-05, "loss": 0.5293, "step": 2516 }, { "epoch": 0.6370539104024298, "grad_norm": 0.14824581146240234, "learning_rate": 1.83730085634071e-05, "loss": 0.5394, "step": 2517 }, { "epoch": 0.6373070108833206, "grad_norm": 0.1481318473815918, "learning_rate": 1.837169924485798e-05, "loss": 0.5484, "step": 2518 }, { "epoch": 0.6375601113642116, "grad_norm": 0.14752435684204102, "learning_rate": 1.8370389446377103e-05, "loss": 0.5054, "step": 2519 }, { "epoch": 0.6378132118451025, "grad_norm": 0.143208846449852, "learning_rate": 1.8369079168039555e-05, "loss": 0.5242, "step": 2520 }, { "epoch": 0.6380663123259934, "grad_norm": 0.14739929139614105, "learning_rate": 1.836776840992045e-05, "loss": 0.5317, "step": 2521 }, { "epoch": 0.6383194128068843, "grad_norm": 0.14670588076114655, "learning_rate": 1.836645717209493e-05, "loss": 0.5345, "step": 2522 }, { "epoch": 0.6385725132877752, "grad_norm": 0.1452939361333847, "learning_rate": 1.836514545463817e-05, "loss": 0.5427, "step": 2523 }, { "epoch": 0.6388256137686662, "grad_norm": 0.14539825916290283, "learning_rate": 1.8363833257625365e-05, "loss": 0.5147, "step": 2524 }, { "epoch": 0.6390787142495571, "grad_norm": 0.1460723876953125, "learning_rate": 1.8362520581131738e-05, "loss": 0.5728, "step": 2525 }, { "epoch": 0.639331814730448, "grad_norm": 0.1652328222990036, "learning_rate": 1.8361207425232548e-05, "loss": 0.5341, "step": 2526 }, { "epoch": 0.6395849152113389, "grad_norm": 0.14856599271297455, "learning_rate": 1.8359893790003074e-05, "loss": 0.5075, "step": 2527 }, { "epoch": 0.6398380156922299, "grad_norm": 0.16230234503746033, "learning_rate": 1.835857967551862e-05, "loss": 0.5655, "step": 2528 }, { "epoch": 0.6400911161731208, "grad_norm": 0.1495852917432785, "learning_rate": 1.8357265081854522e-05, "loss": 0.5381, "step": 2529 }, { "epoch": 0.6403442166540116, "grad_norm": 0.1538114994764328, "learning_rate": 1.8355950009086148e-05, "loss": 0.5569, "step": 2530 }, { "epoch": 0.6405973171349025, "grad_norm": 0.15153038501739502, "learning_rate": 1.8354634457288885e-05, "loss": 0.534, "step": 2531 }, { "epoch": 0.6408504176157934, "grad_norm": 0.15110176801681519, "learning_rate": 1.835331842653815e-05, "loss": 0.5507, "step": 2532 }, { "epoch": 0.6411035180966844, "grad_norm": 0.1493983119726181, "learning_rate": 1.8352001916909387e-05, "loss": 0.5493, "step": 2533 }, { "epoch": 0.6413566185775753, "grad_norm": 0.15783622860908508, "learning_rate": 1.8350684928478072e-05, "loss": 0.5666, "step": 2534 }, { "epoch": 0.6416097190584662, "grad_norm": 0.14776931703090668, "learning_rate": 1.8349367461319704e-05, "loss": 0.5761, "step": 2535 }, { "epoch": 0.6418628195393571, "grad_norm": 0.15445038676261902, "learning_rate": 1.8348049515509814e-05, "loss": 0.5574, "step": 2536 }, { "epoch": 0.6421159200202481, "grad_norm": 0.15116912126541138, "learning_rate": 1.834673109112395e-05, "loss": 0.5492, "step": 2537 }, { "epoch": 0.642369020501139, "grad_norm": 0.14761857688426971, "learning_rate": 1.83454121882377e-05, "loss": 0.5171, "step": 2538 }, { "epoch": 0.6426221209820299, "grad_norm": 0.14666865766048431, "learning_rate": 1.834409280692667e-05, "loss": 0.5354, "step": 2539 }, { "epoch": 0.6428752214629208, "grad_norm": 0.14642728865146637, "learning_rate": 1.83427729472665e-05, "loss": 0.5374, "step": 2540 }, { "epoch": 0.6431283219438116, "grad_norm": 0.1411811113357544, "learning_rate": 1.8341452609332852e-05, "loss": 0.5296, "step": 2541 }, { "epoch": 0.6433814224247026, "grad_norm": 0.15042644739151, "learning_rate": 1.834013179320142e-05, "loss": 0.5422, "step": 2542 }, { "epoch": 0.6436345229055935, "grad_norm": 0.15062500536441803, "learning_rate": 1.8338810498947928e-05, "loss": 0.5535, "step": 2543 }, { "epoch": 0.6438876233864844, "grad_norm": 0.14488181471824646, "learning_rate": 1.8337488726648118e-05, "loss": 0.5395, "step": 2544 }, { "epoch": 0.6441407238673753, "grad_norm": 0.1423717439174652, "learning_rate": 1.8336166476377763e-05, "loss": 0.522, "step": 2545 }, { "epoch": 0.6443938243482663, "grad_norm": 0.1495235413312912, "learning_rate": 1.8334843748212666e-05, "loss": 0.5149, "step": 2546 }, { "epoch": 0.6446469248291572, "grad_norm": 0.14710858464241028, "learning_rate": 1.8333520542228657e-05, "loss": 0.5424, "step": 2547 }, { "epoch": 0.6449000253100481, "grad_norm": 0.16457505524158478, "learning_rate": 1.8332196858501593e-05, "loss": 0.5536, "step": 2548 }, { "epoch": 0.645153125790939, "grad_norm": 0.14198842644691467, "learning_rate": 1.833087269710736e-05, "loss": 0.5159, "step": 2549 }, { "epoch": 0.6454062262718299, "grad_norm": 0.15280668437480927, "learning_rate": 1.8329548058121865e-05, "loss": 0.5615, "step": 2550 }, { "epoch": 0.6456593267527209, "grad_norm": 0.14535677433013916, "learning_rate": 1.832822294162105e-05, "loss": 0.5491, "step": 2551 }, { "epoch": 0.6459124272336118, "grad_norm": 0.14739260077476501, "learning_rate": 1.832689734768088e-05, "loss": 0.536, "step": 2552 }, { "epoch": 0.6461655277145026, "grad_norm": 0.14564937353134155, "learning_rate": 1.8325571276377347e-05, "loss": 0.5162, "step": 2553 }, { "epoch": 0.6464186281953935, "grad_norm": 0.14750128984451294, "learning_rate": 1.8324244727786477e-05, "loss": 0.544, "step": 2554 }, { "epoch": 0.6466717286762845, "grad_norm": 0.15399467945098877, "learning_rate": 1.832291770198431e-05, "loss": 0.5167, "step": 2555 }, { "epoch": 0.6469248291571754, "grad_norm": 0.14649620652198792, "learning_rate": 1.832159019904693e-05, "loss": 0.5685, "step": 2556 }, { "epoch": 0.6471779296380663, "grad_norm": 0.14589351415634155, "learning_rate": 1.8320262219050432e-05, "loss": 0.5276, "step": 2557 }, { "epoch": 0.6474310301189572, "grad_norm": 0.14570419490337372, "learning_rate": 1.8318933762070954e-05, "loss": 0.5218, "step": 2558 }, { "epoch": 0.6476841305998481, "grad_norm": 0.14705124497413635, "learning_rate": 1.8317604828184648e-05, "loss": 0.5315, "step": 2559 }, { "epoch": 0.6479372310807391, "grad_norm": 0.14263677597045898, "learning_rate": 1.83162754174677e-05, "loss": 0.5315, "step": 2560 }, { "epoch": 0.64819033156163, "grad_norm": 0.1443737894296646, "learning_rate": 1.8314945529996328e-05, "loss": 0.5539, "step": 2561 }, { "epoch": 0.6484434320425209, "grad_norm": 0.14575058221817017, "learning_rate": 1.831361516584676e-05, "loss": 0.5229, "step": 2562 }, { "epoch": 0.6486965325234118, "grad_norm": 0.15036718547344208, "learning_rate": 1.8312284325095274e-05, "loss": 0.5123, "step": 2563 }, { "epoch": 0.6489496330043028, "grad_norm": 0.1482842117547989, "learning_rate": 1.8310953007818158e-05, "loss": 0.5636, "step": 2564 }, { "epoch": 0.6492027334851936, "grad_norm": 0.1420038640499115, "learning_rate": 1.8309621214091734e-05, "loss": 0.5231, "step": 2565 }, { "epoch": 0.6494558339660845, "grad_norm": 0.1481584757566452, "learning_rate": 1.8308288943992357e-05, "loss": 0.5328, "step": 2566 }, { "epoch": 0.6497089344469754, "grad_norm": 0.14541727304458618, "learning_rate": 1.8306956197596397e-05, "loss": 0.5228, "step": 2567 }, { "epoch": 0.6499620349278664, "grad_norm": 0.14279106259346008, "learning_rate": 1.830562297498026e-05, "loss": 0.54, "step": 2568 }, { "epoch": 0.6502151354087573, "grad_norm": 0.1472083181142807, "learning_rate": 1.8304289276220375e-05, "loss": 0.5329, "step": 2569 }, { "epoch": 0.6504682358896482, "grad_norm": 0.15057985484600067, "learning_rate": 1.83029551013932e-05, "loss": 0.5335, "step": 2570 }, { "epoch": 0.6507213363705391, "grad_norm": 0.3025047481060028, "learning_rate": 1.8301620450575225e-05, "loss": 0.5398, "step": 2571 }, { "epoch": 0.65097443685143, "grad_norm": 0.14984352886676788, "learning_rate": 1.8300285323842953e-05, "loss": 0.5528, "step": 2572 }, { "epoch": 0.651227537332321, "grad_norm": 0.14657318592071533, "learning_rate": 1.8298949721272935e-05, "loss": 0.546, "step": 2573 }, { "epoch": 0.6514806378132119, "grad_norm": 0.14619691669940948, "learning_rate": 1.8297613642941736e-05, "loss": 0.5644, "step": 2574 }, { "epoch": 0.6517337382941027, "grad_norm": 0.14889754354953766, "learning_rate": 1.8296277088925944e-05, "loss": 0.529, "step": 2575 }, { "epoch": 0.6519868387749936, "grad_norm": 0.14526212215423584, "learning_rate": 1.8294940059302185e-05, "loss": 0.5276, "step": 2576 }, { "epoch": 0.6522399392558846, "grad_norm": 0.14411452412605286, "learning_rate": 1.829360255414711e-05, "loss": 0.5287, "step": 2577 }, { "epoch": 0.6524930397367755, "grad_norm": 0.14212995767593384, "learning_rate": 1.8292264573537392e-05, "loss": 0.5244, "step": 2578 }, { "epoch": 0.6527461402176664, "grad_norm": 0.15822291374206543, "learning_rate": 1.8290926117549737e-05, "loss": 0.5323, "step": 2579 }, { "epoch": 0.6529992406985573, "grad_norm": 0.14641554653644562, "learning_rate": 1.8289587186260874e-05, "loss": 0.5468, "step": 2580 }, { "epoch": 0.6532523411794482, "grad_norm": 0.14154264330863953, "learning_rate": 1.8288247779747564e-05, "loss": 0.5034, "step": 2581 }, { "epoch": 0.6535054416603392, "grad_norm": 0.15055370330810547, "learning_rate": 1.8286907898086586e-05, "loss": 0.5511, "step": 2582 }, { "epoch": 0.6537585421412301, "grad_norm": 0.14376474916934967, "learning_rate": 1.828556754135476e-05, "loss": 0.5399, "step": 2583 }, { "epoch": 0.654011642622121, "grad_norm": 0.1468782275915146, "learning_rate": 1.8284226709628917e-05, "loss": 0.5159, "step": 2584 }, { "epoch": 0.6542647431030119, "grad_norm": 0.14444656670093536, "learning_rate": 1.8282885402985936e-05, "loss": 0.5384, "step": 2585 }, { "epoch": 0.6545178435839029, "grad_norm": 0.14160342514514923, "learning_rate": 1.8281543621502706e-05, "loss": 0.5061, "step": 2586 }, { "epoch": 0.6547709440647937, "grad_norm": 0.15068010985851288, "learning_rate": 1.828020136525614e-05, "loss": 0.5265, "step": 2587 }, { "epoch": 0.6550240445456846, "grad_norm": 0.14576853811740875, "learning_rate": 1.82788586343232e-05, "loss": 0.5338, "step": 2588 }, { "epoch": 0.6552771450265755, "grad_norm": 0.1483764946460724, "learning_rate": 1.8277515428780855e-05, "loss": 0.549, "step": 2589 }, { "epoch": 0.6555302455074664, "grad_norm": 0.14455454051494598, "learning_rate": 1.8276171748706107e-05, "loss": 0.54, "step": 2590 }, { "epoch": 0.6557833459883574, "grad_norm": 0.1460620015859604, "learning_rate": 1.827482759417599e-05, "loss": 0.5433, "step": 2591 }, { "epoch": 0.6560364464692483, "grad_norm": 0.14440762996673584, "learning_rate": 1.827348296526756e-05, "loss": 0.5548, "step": 2592 }, { "epoch": 0.6562895469501392, "grad_norm": 0.1437801718711853, "learning_rate": 1.82721378620579e-05, "loss": 0.5473, "step": 2593 }, { "epoch": 0.6565426474310301, "grad_norm": 0.18944020569324493, "learning_rate": 1.8270792284624127e-05, "loss": 0.5425, "step": 2594 }, { "epoch": 0.6567957479119211, "grad_norm": 0.1519591063261032, "learning_rate": 1.8269446233043373e-05, "loss": 0.5229, "step": 2595 }, { "epoch": 0.657048848392812, "grad_norm": 0.15800318121910095, "learning_rate": 1.826809970739281e-05, "loss": 0.5294, "step": 2596 }, { "epoch": 0.6573019488737029, "grad_norm": 0.1494808793067932, "learning_rate": 1.826675270774963e-05, "loss": 0.5266, "step": 2597 }, { "epoch": 0.6575550493545937, "grad_norm": 0.151277557015419, "learning_rate": 1.826540523419105e-05, "loss": 0.5543, "step": 2598 }, { "epoch": 0.6578081498354846, "grad_norm": 0.14490482211112976, "learning_rate": 1.8264057286794323e-05, "loss": 0.5365, "step": 2599 }, { "epoch": 0.6580612503163756, "grad_norm": 0.15144558250904083, "learning_rate": 1.8262708865636724e-05, "loss": 0.5418, "step": 2600 }, { "epoch": 0.6583143507972665, "grad_norm": 0.14295633137226105, "learning_rate": 1.826135997079555e-05, "loss": 0.5208, "step": 2601 }, { "epoch": 0.6585674512781574, "grad_norm": 0.1654975265264511, "learning_rate": 1.8260010602348136e-05, "loss": 0.5612, "step": 2602 }, { "epoch": 0.6588205517590483, "grad_norm": 0.1482984572649002, "learning_rate": 1.825866076037183e-05, "loss": 0.5505, "step": 2603 }, { "epoch": 0.6590736522399393, "grad_norm": 0.34943100810050964, "learning_rate": 1.8257310444944028e-05, "loss": 0.5318, "step": 2604 }, { "epoch": 0.6593267527208302, "grad_norm": 0.14697709679603577, "learning_rate": 1.8255959656142127e-05, "loss": 0.5271, "step": 2605 }, { "epoch": 0.6595798532017211, "grad_norm": 0.15266238152980804, "learning_rate": 1.8254608394043578e-05, "loss": 0.5424, "step": 2606 }, { "epoch": 0.659832953682612, "grad_norm": 0.15145014226436615, "learning_rate": 1.8253256658725837e-05, "loss": 0.5597, "step": 2607 }, { "epoch": 0.6600860541635029, "grad_norm": 0.14370834827423096, "learning_rate": 1.82519044502664e-05, "loss": 0.5167, "step": 2608 }, { "epoch": 0.6603391546443939, "grad_norm": 0.1422983705997467, "learning_rate": 1.8250551768742783e-05, "loss": 0.522, "step": 2609 }, { "epoch": 0.6605922551252847, "grad_norm": 0.14248280227184296, "learning_rate": 1.8249198614232538e-05, "loss": 0.5082, "step": 2610 }, { "epoch": 0.6608453556061756, "grad_norm": 0.14176413416862488, "learning_rate": 1.8247844986813233e-05, "loss": 0.5422, "step": 2611 }, { "epoch": 0.6610984560870665, "grad_norm": 0.14454533159732819, "learning_rate": 1.824649088656247e-05, "loss": 0.534, "step": 2612 }, { "epoch": 0.6613515565679575, "grad_norm": 0.15026485919952393, "learning_rate": 1.8245136313557876e-05, "loss": 0.548, "step": 2613 }, { "epoch": 0.6616046570488484, "grad_norm": 0.1425452083349228, "learning_rate": 1.8243781267877108e-05, "loss": 0.5117, "step": 2614 }, { "epoch": 0.6618577575297393, "grad_norm": 0.14487503468990326, "learning_rate": 1.824242574959785e-05, "loss": 0.5207, "step": 2615 }, { "epoch": 0.6621108580106302, "grad_norm": 0.28814759850502014, "learning_rate": 1.8241069758797805e-05, "loss": 0.5417, "step": 2616 }, { "epoch": 0.6623639584915212, "grad_norm": 0.1512315720319748, "learning_rate": 1.823971329555471e-05, "loss": 0.5347, "step": 2617 }, { "epoch": 0.6626170589724121, "grad_norm": 0.1426512598991394, "learning_rate": 1.8238356359946337e-05, "loss": 0.5465, "step": 2618 }, { "epoch": 0.662870159453303, "grad_norm": 0.1508060097694397, "learning_rate": 1.8236998952050467e-05, "loss": 0.5312, "step": 2619 }, { "epoch": 0.6631232599341939, "grad_norm": 0.16253367066383362, "learning_rate": 1.8235641071944916e-05, "loss": 0.5339, "step": 2620 }, { "epoch": 0.6633763604150847, "grad_norm": 0.1454850435256958, "learning_rate": 1.8234282719707534e-05, "loss": 0.5083, "step": 2621 }, { "epoch": 0.6636294608959757, "grad_norm": 0.14943727850914001, "learning_rate": 1.823292389541619e-05, "loss": 0.5344, "step": 2622 }, { "epoch": 0.6638825613768666, "grad_norm": 0.15019634366035461, "learning_rate": 1.8231564599148787e-05, "loss": 0.545, "step": 2623 }, { "epoch": 0.6641356618577575, "grad_norm": 0.15300233662128448, "learning_rate": 1.8230204830983243e-05, "loss": 0.53, "step": 2624 }, { "epoch": 0.6643887623386484, "grad_norm": 0.14591997861862183, "learning_rate": 1.8228844590997514e-05, "loss": 0.5388, "step": 2625 }, { "epoch": 0.6646418628195394, "grad_norm": 0.1456891894340515, "learning_rate": 1.822748387926958e-05, "loss": 0.5484, "step": 2626 }, { "epoch": 0.6648949633004303, "grad_norm": 0.1455857753753662, "learning_rate": 1.822612269587745e-05, "loss": 0.5297, "step": 2627 }, { "epoch": 0.6651480637813212, "grad_norm": 0.14482049643993378, "learning_rate": 1.8224761040899154e-05, "loss": 0.5385, "step": 2628 }, { "epoch": 0.6654011642622121, "grad_norm": 0.14514224231243134, "learning_rate": 1.8223398914412755e-05, "loss": 0.5399, "step": 2629 }, { "epoch": 0.665654264743103, "grad_norm": 0.14818979799747467, "learning_rate": 1.8222036316496342e-05, "loss": 0.521, "step": 2630 }, { "epoch": 0.665907365223994, "grad_norm": 0.14780977368354797, "learning_rate": 1.8220673247228022e-05, "loss": 0.5621, "step": 2631 }, { "epoch": 0.6661604657048849, "grad_norm": 0.14855843782424927, "learning_rate": 1.821930970668595e-05, "loss": 0.5312, "step": 2632 }, { "epoch": 0.6664135661857757, "grad_norm": 0.14819836616516113, "learning_rate": 1.821794569494828e-05, "loss": 0.495, "step": 2633 }, { "epoch": 0.6666666666666666, "grad_norm": 0.1473757028579712, "learning_rate": 1.821658121209322e-05, "loss": 0.5421, "step": 2634 }, { "epoch": 0.6669197671475576, "grad_norm": 0.14525184035301208, "learning_rate": 1.821521625819899e-05, "loss": 0.5207, "step": 2635 }, { "epoch": 0.6671728676284485, "grad_norm": 0.14685095846652985, "learning_rate": 1.8213850833343836e-05, "loss": 0.5216, "step": 2636 }, { "epoch": 0.6674259681093394, "grad_norm": 0.14816656708717346, "learning_rate": 1.821248493760604e-05, "loss": 0.5292, "step": 2637 }, { "epoch": 0.6676790685902303, "grad_norm": 0.14218780398368835, "learning_rate": 1.8211118571063896e-05, "loss": 0.5333, "step": 2638 }, { "epoch": 0.6679321690711212, "grad_norm": 0.15113654732704163, "learning_rate": 1.820975173379575e-05, "loss": 0.5349, "step": 2639 }, { "epoch": 0.6681852695520122, "grad_norm": 0.1410970240831375, "learning_rate": 1.820838442587995e-05, "loss": 0.5299, "step": 2640 }, { "epoch": 0.6684383700329031, "grad_norm": 0.1561165153980255, "learning_rate": 1.8207016647394882e-05, "loss": 0.5349, "step": 2641 }, { "epoch": 0.668691470513794, "grad_norm": 0.15471969544887543, "learning_rate": 1.8205648398418957e-05, "loss": 0.5291, "step": 2642 }, { "epoch": 0.6689445709946849, "grad_norm": 0.1493500918149948, "learning_rate": 1.8204279679030617e-05, "loss": 0.5074, "step": 2643 }, { "epoch": 0.6691976714755759, "grad_norm": 0.14829988777637482, "learning_rate": 1.8202910489308327e-05, "loss": 0.4893, "step": 2644 }, { "epoch": 0.6694507719564667, "grad_norm": 0.14577631652355194, "learning_rate": 1.820154082933058e-05, "loss": 0.5165, "step": 2645 }, { "epoch": 0.6697038724373576, "grad_norm": 0.1467095911502838, "learning_rate": 1.820017069917589e-05, "loss": 0.5523, "step": 2646 }, { "epoch": 0.6699569729182485, "grad_norm": 0.1475784182548523, "learning_rate": 1.8198800098922812e-05, "loss": 0.5437, "step": 2647 }, { "epoch": 0.6702100733991394, "grad_norm": 0.14039447903633118, "learning_rate": 1.8197429028649916e-05, "loss": 0.5496, "step": 2648 }, { "epoch": 0.6704631738800304, "grad_norm": 0.14430488646030426, "learning_rate": 1.8196057488435802e-05, "loss": 0.5046, "step": 2649 }, { "epoch": 0.6707162743609213, "grad_norm": 0.15438096225261688, "learning_rate": 1.81946854783591e-05, "loss": 0.557, "step": 2650 }, { "epoch": 0.6709693748418122, "grad_norm": 0.1537015587091446, "learning_rate": 1.8193312998498458e-05, "loss": 0.5419, "step": 2651 }, { "epoch": 0.6712224753227031, "grad_norm": 0.1503497213125229, "learning_rate": 1.8191940048932565e-05, "loss": 0.5565, "step": 2652 }, { "epoch": 0.6714755758035941, "grad_norm": 0.14642778038978577, "learning_rate": 1.8190566629740125e-05, "loss": 0.5475, "step": 2653 }, { "epoch": 0.671728676284485, "grad_norm": 0.14955230057239532, "learning_rate": 1.8189192740999876e-05, "loss": 0.5181, "step": 2654 }, { "epoch": 0.6719817767653758, "grad_norm": 0.1473504602909088, "learning_rate": 1.8187818382790575e-05, "loss": 0.5528, "step": 2655 }, { "epoch": 0.6722348772462667, "grad_norm": 0.15076345205307007, "learning_rate": 1.818644355519102e-05, "loss": 0.5451, "step": 2656 }, { "epoch": 0.6724879777271576, "grad_norm": 0.14599618315696716, "learning_rate": 1.8185068258280013e-05, "loss": 0.5012, "step": 2657 }, { "epoch": 0.6727410782080486, "grad_norm": 0.14549905061721802, "learning_rate": 1.818369249213641e-05, "loss": 0.5373, "step": 2658 }, { "epoch": 0.6729941786889395, "grad_norm": 0.146087646484375, "learning_rate": 1.818231625683908e-05, "loss": 0.5227, "step": 2659 }, { "epoch": 0.6732472791698304, "grad_norm": 0.18517613410949707, "learning_rate": 1.818093955246691e-05, "loss": 0.5253, "step": 2660 }, { "epoch": 0.6735003796507213, "grad_norm": 0.15097062289714813, "learning_rate": 1.817956237909883e-05, "loss": 0.5283, "step": 2661 }, { "epoch": 0.6737534801316123, "grad_norm": 0.14378787577152252, "learning_rate": 1.8178184736813792e-05, "loss": 0.506, "step": 2662 }, { "epoch": 0.6740065806125032, "grad_norm": 0.14214852452278137, "learning_rate": 1.8176806625690768e-05, "loss": 0.5049, "step": 2663 }, { "epoch": 0.6742596810933941, "grad_norm": 0.1420966237783432, "learning_rate": 1.8175428045808767e-05, "loss": 0.5188, "step": 2664 }, { "epoch": 0.674512781574285, "grad_norm": 0.14496557414531708, "learning_rate": 1.8174048997246818e-05, "loss": 0.518, "step": 2665 }, { "epoch": 0.674765882055176, "grad_norm": 0.14519834518432617, "learning_rate": 1.8172669480083978e-05, "loss": 0.5376, "step": 2666 }, { "epoch": 0.6750189825360668, "grad_norm": 0.15130527317523956, "learning_rate": 1.8171289494399335e-05, "loss": 0.533, "step": 2667 }, { "epoch": 0.6752720830169577, "grad_norm": 0.15248116850852966, "learning_rate": 1.8169909040271997e-05, "loss": 0.5196, "step": 2668 }, { "epoch": 0.6755251834978486, "grad_norm": 0.1468237340450287, "learning_rate": 1.8168528117781104e-05, "loss": 0.5035, "step": 2669 }, { "epoch": 0.6757782839787395, "grad_norm": 0.1470155268907547, "learning_rate": 1.8167146727005824e-05, "loss": 0.5208, "step": 2670 }, { "epoch": 0.6760313844596305, "grad_norm": 0.14594605565071106, "learning_rate": 1.8165764868025344e-05, "loss": 0.5228, "step": 2671 }, { "epoch": 0.6762844849405214, "grad_norm": 0.14652718603610992, "learning_rate": 1.816438254091889e-05, "loss": 0.547, "step": 2672 }, { "epoch": 0.6765375854214123, "grad_norm": 0.14855660498142242, "learning_rate": 1.8162999745765696e-05, "loss": 0.5525, "step": 2673 }, { "epoch": 0.6767906859023032, "grad_norm": 0.14703433215618134, "learning_rate": 1.8161616482645048e-05, "loss": 0.5521, "step": 2674 }, { "epoch": 0.6770437863831942, "grad_norm": 0.15182270109653473, "learning_rate": 1.816023275163624e-05, "loss": 0.5567, "step": 2675 }, { "epoch": 0.6772968868640851, "grad_norm": 0.15013697743415833, "learning_rate": 1.8158848552818592e-05, "loss": 0.5535, "step": 2676 }, { "epoch": 0.677549987344976, "grad_norm": 0.14551712572574615, "learning_rate": 1.815746388627147e-05, "loss": 0.5388, "step": 2677 }, { "epoch": 0.6778030878258668, "grad_norm": 0.1438574492931366, "learning_rate": 1.8156078752074246e-05, "loss": 0.5222, "step": 2678 }, { "epoch": 0.6780561883067577, "grad_norm": 0.14907999336719513, "learning_rate": 1.8154693150306327e-05, "loss": 0.5325, "step": 2679 }, { "epoch": 0.6783092887876487, "grad_norm": 0.1412954479455948, "learning_rate": 1.815330708104715e-05, "loss": 0.5149, "step": 2680 }, { "epoch": 0.6785623892685396, "grad_norm": 0.14831040799617767, "learning_rate": 1.815192054437617e-05, "loss": 0.5388, "step": 2681 }, { "epoch": 0.6788154897494305, "grad_norm": 0.14994782209396362, "learning_rate": 1.815053354037288e-05, "loss": 0.5435, "step": 2682 }, { "epoch": 0.6790685902303214, "grad_norm": 0.142563596367836, "learning_rate": 1.814914606911679e-05, "loss": 0.5269, "step": 2683 }, { "epoch": 0.6793216907112124, "grad_norm": 0.14263266324996948, "learning_rate": 1.814775813068744e-05, "loss": 0.5313, "step": 2684 }, { "epoch": 0.6795747911921033, "grad_norm": 0.14363744854927063, "learning_rate": 1.81463697251644e-05, "loss": 0.5108, "step": 2685 }, { "epoch": 0.6798278916729942, "grad_norm": 0.187117338180542, "learning_rate": 1.8144980852627266e-05, "loss": 0.5346, "step": 2686 }, { "epoch": 0.6800809921538851, "grad_norm": 0.1482447385787964, "learning_rate": 1.8143591513155662e-05, "loss": 0.5538, "step": 2687 }, { "epoch": 0.680334092634776, "grad_norm": 0.1461954563856125, "learning_rate": 1.8142201706829225e-05, "loss": 0.5215, "step": 2688 }, { "epoch": 0.680587193115667, "grad_norm": 0.1450091302394867, "learning_rate": 1.8140811433727633e-05, "loss": 0.5306, "step": 2689 }, { "epoch": 0.6808402935965578, "grad_norm": 0.14692509174346924, "learning_rate": 1.81394206939306e-05, "loss": 0.5418, "step": 2690 }, { "epoch": 0.6810933940774487, "grad_norm": 0.14713634550571442, "learning_rate": 1.8138029487517833e-05, "loss": 0.5421, "step": 2691 }, { "epoch": 0.6813464945583396, "grad_norm": 0.140730082988739, "learning_rate": 1.8136637814569108e-05, "loss": 0.5322, "step": 2692 }, { "epoch": 0.6815995950392306, "grad_norm": 0.14739148318767548, "learning_rate": 1.813524567516419e-05, "loss": 0.5394, "step": 2693 }, { "epoch": 0.6818526955201215, "grad_norm": 0.14859172701835632, "learning_rate": 1.81338530693829e-05, "loss": 0.5419, "step": 2694 }, { "epoch": 0.6821057960010124, "grad_norm": 0.14800290763378143, "learning_rate": 1.8132459997305063e-05, "loss": 0.5281, "step": 2695 }, { "epoch": 0.6823588964819033, "grad_norm": 0.2411351054906845, "learning_rate": 1.8131066459010546e-05, "loss": 0.5375, "step": 2696 }, { "epoch": 0.6826119969627942, "grad_norm": 0.14017412066459656, "learning_rate": 1.812967245457924e-05, "loss": 0.5304, "step": 2697 }, { "epoch": 0.6828650974436852, "grad_norm": 0.1507704257965088, "learning_rate": 1.812827798409105e-05, "loss": 0.5114, "step": 2698 }, { "epoch": 0.6831181979245761, "grad_norm": 0.14874079823493958, "learning_rate": 1.8126883047625933e-05, "loss": 0.5418, "step": 2699 }, { "epoch": 0.683371298405467, "grad_norm": 0.14708906412124634, "learning_rate": 1.8125487645263847e-05, "loss": 0.5366, "step": 2700 }, { "epoch": 0.6836243988863578, "grad_norm": 0.1492803692817688, "learning_rate": 1.812409177708479e-05, "loss": 0.5398, "step": 2701 }, { "epoch": 0.6838774993672488, "grad_norm": 0.14203189313411713, "learning_rate": 1.8122695443168785e-05, "loss": 0.5186, "step": 2702 }, { "epoch": 0.6841305998481397, "grad_norm": 0.15312273800373077, "learning_rate": 1.812129864359588e-05, "loss": 0.5266, "step": 2703 }, { "epoch": 0.6843837003290306, "grad_norm": 0.14636339247226715, "learning_rate": 1.811990137844615e-05, "loss": 0.5243, "step": 2704 }, { "epoch": 0.6846368008099215, "grad_norm": 0.14452330768108368, "learning_rate": 1.81185036477997e-05, "loss": 0.5438, "step": 2705 }, { "epoch": 0.6848899012908124, "grad_norm": 0.1509128361940384, "learning_rate": 1.8117105451736657e-05, "loss": 0.5311, "step": 2706 }, { "epoch": 0.6851430017717034, "grad_norm": 0.15748311579227448, "learning_rate": 1.8115706790337176e-05, "loss": 0.5302, "step": 2707 }, { "epoch": 0.6853961022525943, "grad_norm": 0.14035460352897644, "learning_rate": 1.8114307663681444e-05, "loss": 0.5152, "step": 2708 }, { "epoch": 0.6856492027334852, "grad_norm": 0.14641892910003662, "learning_rate": 1.811290807184966e-05, "loss": 0.5333, "step": 2709 }, { "epoch": 0.6859023032143761, "grad_norm": 0.14782601594924927, "learning_rate": 1.811150801492207e-05, "loss": 0.545, "step": 2710 }, { "epoch": 0.6861554036952671, "grad_norm": 0.16503480076789856, "learning_rate": 1.811010749297893e-05, "loss": 0.5038, "step": 2711 }, { "epoch": 0.686408504176158, "grad_norm": 0.16098101437091827, "learning_rate": 1.810870650610053e-05, "loss": 0.5523, "step": 2712 }, { "epoch": 0.6866616046570488, "grad_norm": 0.14806942641735077, "learning_rate": 1.8107305054367188e-05, "loss": 0.5503, "step": 2713 }, { "epoch": 0.6869147051379397, "grad_norm": 0.14722320437431335, "learning_rate": 1.8105903137859248e-05, "loss": 0.5556, "step": 2714 }, { "epoch": 0.6871678056188307, "grad_norm": 0.15449842810630798, "learning_rate": 1.8104500756657072e-05, "loss": 0.5254, "step": 2715 }, { "epoch": 0.6874209060997216, "grad_norm": 0.14408960938453674, "learning_rate": 1.8103097910841055e-05, "loss": 0.528, "step": 2716 }, { "epoch": 0.6876740065806125, "grad_norm": 0.14892727136611938, "learning_rate": 1.810169460049163e-05, "loss": 0.5322, "step": 2717 }, { "epoch": 0.6879271070615034, "grad_norm": 0.14627705514431, "learning_rate": 1.8100290825689238e-05, "loss": 0.536, "step": 2718 }, { "epoch": 0.6881802075423943, "grad_norm": 0.1423405110836029, "learning_rate": 1.8098886586514356e-05, "loss": 0.5169, "step": 2719 }, { "epoch": 0.6884333080232853, "grad_norm": 0.14613592624664307, "learning_rate": 1.8097481883047483e-05, "loss": 0.5193, "step": 2720 }, { "epoch": 0.6886864085041762, "grad_norm": 0.1515716165304184, "learning_rate": 1.8096076715369152e-05, "loss": 0.5355, "step": 2721 }, { "epoch": 0.6889395089850671, "grad_norm": 0.14382004737854004, "learning_rate": 1.8094671083559918e-05, "loss": 0.5517, "step": 2722 }, { "epoch": 0.689192609465958, "grad_norm": 0.1456882804632187, "learning_rate": 1.809326498770036e-05, "loss": 0.539, "step": 2723 }, { "epoch": 0.689445709946849, "grad_norm": 0.1479150950908661, "learning_rate": 1.809185842787109e-05, "loss": 0.5223, "step": 2724 }, { "epoch": 0.6896988104277398, "grad_norm": 0.14168787002563477, "learning_rate": 1.8090451404152736e-05, "loss": 0.5394, "step": 2725 }, { "epoch": 0.6899519109086307, "grad_norm": 0.149136021733284, "learning_rate": 1.808904391662597e-05, "loss": 0.5013, "step": 2726 }, { "epoch": 0.6902050113895216, "grad_norm": 0.1428527981042862, "learning_rate": 1.808763596537147e-05, "loss": 0.5037, "step": 2727 }, { "epoch": 0.6904581118704125, "grad_norm": 0.1451261341571808, "learning_rate": 1.8086227550469965e-05, "loss": 0.5326, "step": 2728 }, { "epoch": 0.6907112123513035, "grad_norm": 0.14612074196338654, "learning_rate": 1.808481867200218e-05, "loss": 0.5391, "step": 2729 }, { "epoch": 0.6909643128321944, "grad_norm": 0.147441565990448, "learning_rate": 1.808340933004889e-05, "loss": 0.5487, "step": 2730 }, { "epoch": 0.6912174133130853, "grad_norm": 0.14560317993164062, "learning_rate": 1.808199952469089e-05, "loss": 0.5093, "step": 2731 }, { "epoch": 0.6914705137939762, "grad_norm": 0.14556057751178741, "learning_rate": 1.8080589256009007e-05, "loss": 0.5582, "step": 2732 }, { "epoch": 0.6917236142748672, "grad_norm": 0.1445273905992508, "learning_rate": 1.8079178524084077e-05, "loss": 0.5189, "step": 2733 }, { "epoch": 0.6919767147557581, "grad_norm": 0.14358773827552795, "learning_rate": 1.8077767328996983e-05, "loss": 0.515, "step": 2734 }, { "epoch": 0.692229815236649, "grad_norm": 0.1437392234802246, "learning_rate": 1.8076355670828626e-05, "loss": 0.5014, "step": 2735 }, { "epoch": 0.6924829157175398, "grad_norm": 0.1445649117231369, "learning_rate": 1.8074943549659923e-05, "loss": 0.5834, "step": 2736 }, { "epoch": 0.6927360161984307, "grad_norm": 0.14494314789772034, "learning_rate": 1.8073530965571838e-05, "loss": 0.5269, "step": 2737 }, { "epoch": 0.6929891166793217, "grad_norm": 0.14743253588676453, "learning_rate": 1.807211791864535e-05, "loss": 0.5393, "step": 2738 }, { "epoch": 0.6932422171602126, "grad_norm": 0.14686620235443115, "learning_rate": 1.8070704408961463e-05, "loss": 0.5481, "step": 2739 }, { "epoch": 0.6934953176411035, "grad_norm": 0.1491858810186386, "learning_rate": 1.806929043660121e-05, "loss": 0.5529, "step": 2740 }, { "epoch": 0.6937484181219944, "grad_norm": 0.1462259441614151, "learning_rate": 1.806787600164566e-05, "loss": 0.5477, "step": 2741 }, { "epoch": 0.6940015186028854, "grad_norm": 0.14706359803676605, "learning_rate": 1.8066461104175888e-05, "loss": 0.5374, "step": 2742 }, { "epoch": 0.6942546190837763, "grad_norm": 0.14483709633350372, "learning_rate": 1.8065045744273015e-05, "loss": 0.5161, "step": 2743 }, { "epoch": 0.6945077195646672, "grad_norm": 0.15205207467079163, "learning_rate": 1.8063629922018172e-05, "loss": 0.5238, "step": 2744 }, { "epoch": 0.6947608200455581, "grad_norm": 0.15554554760456085, "learning_rate": 1.8062213637492538e-05, "loss": 0.52, "step": 2745 }, { "epoch": 0.695013920526449, "grad_norm": 0.15813574194908142, "learning_rate": 1.8060796890777294e-05, "loss": 0.5371, "step": 2746 }, { "epoch": 0.69526702100734, "grad_norm": 0.14827005565166473, "learning_rate": 1.805937968195366e-05, "loss": 0.5507, "step": 2747 }, { "epoch": 0.6955201214882308, "grad_norm": 0.14521510899066925, "learning_rate": 1.805796201110289e-05, "loss": 0.5171, "step": 2748 }, { "epoch": 0.6957732219691217, "grad_norm": 0.1462366133928299, "learning_rate": 1.8056543878306247e-05, "loss": 0.5548, "step": 2749 }, { "epoch": 0.6960263224500126, "grad_norm": 0.14651119709014893, "learning_rate": 1.8055125283645036e-05, "loss": 0.5377, "step": 2750 }, { "epoch": 0.6962794229309036, "grad_norm": 0.14567963778972626, "learning_rate": 1.805370622720058e-05, "loss": 0.5358, "step": 2751 }, { "epoch": 0.6965325234117945, "grad_norm": 0.15595458447933197, "learning_rate": 1.8052286709054226e-05, "loss": 0.5452, "step": 2752 }, { "epoch": 0.6967856238926854, "grad_norm": 0.1589277982711792, "learning_rate": 1.805086672928736e-05, "loss": 0.5441, "step": 2753 }, { "epoch": 0.6970387243735763, "grad_norm": 0.14181219041347504, "learning_rate": 1.804944628798138e-05, "loss": 0.5248, "step": 2754 }, { "epoch": 0.6972918248544672, "grad_norm": 0.14502781629562378, "learning_rate": 1.804802538521772e-05, "loss": 0.5059, "step": 2755 }, { "epoch": 0.6975449253353582, "grad_norm": 0.14499282836914062, "learning_rate": 1.8046604021077834e-05, "loss": 0.5058, "step": 2756 }, { "epoch": 0.6977980258162491, "grad_norm": 0.14618387818336487, "learning_rate": 1.804518219564321e-05, "loss": 0.5181, "step": 2757 }, { "epoch": 0.69805112629714, "grad_norm": 0.14578969776630402, "learning_rate": 1.8043759908995355e-05, "loss": 0.539, "step": 2758 }, { "epoch": 0.6983042267780308, "grad_norm": 0.14962175488471985, "learning_rate": 1.8042337161215808e-05, "loss": 0.5291, "step": 2759 }, { "epoch": 0.6985573272589218, "grad_norm": 0.16270391643047333, "learning_rate": 1.8040913952386134e-05, "loss": 0.5516, "step": 2760 }, { "epoch": 0.6988104277398127, "grad_norm": 0.15037229657173157, "learning_rate": 1.8039490282587916e-05, "loss": 0.5411, "step": 2761 }, { "epoch": 0.6990635282207036, "grad_norm": 0.14972001314163208, "learning_rate": 1.803806615190278e-05, "loss": 0.5267, "step": 2762 }, { "epoch": 0.6993166287015945, "grad_norm": 0.14504480361938477, "learning_rate": 1.8036641560412355e-05, "loss": 0.5524, "step": 2763 }, { "epoch": 0.6995697291824855, "grad_norm": 0.14563485980033875, "learning_rate": 1.8035216508198318e-05, "loss": 0.5161, "step": 2764 }, { "epoch": 0.6998228296633764, "grad_norm": 0.14676491916179657, "learning_rate": 1.8033790995342368e-05, "loss": 0.5358, "step": 2765 }, { "epoch": 0.7000759301442673, "grad_norm": 0.1475553661584854, "learning_rate": 1.8032365021926218e-05, "loss": 0.5367, "step": 2766 }, { "epoch": 0.7003290306251582, "grad_norm": 0.1452532410621643, "learning_rate": 1.803093858803162e-05, "loss": 0.5359, "step": 2767 }, { "epoch": 0.700582131106049, "grad_norm": 0.16440361738204956, "learning_rate": 1.802951169374035e-05, "loss": 0.5153, "step": 2768 }, { "epoch": 0.70083523158694, "grad_norm": 0.14603191614151, "learning_rate": 1.8028084339134205e-05, "loss": 0.5208, "step": 2769 }, { "epoch": 0.7010883320678309, "grad_norm": 0.14808522164821625, "learning_rate": 1.8026656524295018e-05, "loss": 0.5457, "step": 2770 }, { "epoch": 0.7013414325487218, "grad_norm": 0.1442599594593048, "learning_rate": 1.8025228249304637e-05, "loss": 0.5235, "step": 2771 }, { "epoch": 0.7015945330296127, "grad_norm": 0.14662116765975952, "learning_rate": 1.8023799514244943e-05, "loss": 0.5088, "step": 2772 }, { "epoch": 0.7018476335105037, "grad_norm": 0.142899751663208, "learning_rate": 1.8022370319197847e-05, "loss": 0.538, "step": 2773 }, { "epoch": 0.7021007339913946, "grad_norm": 0.14384151995182037, "learning_rate": 1.8020940664245272e-05, "loss": 0.5427, "step": 2774 }, { "epoch": 0.7023538344722855, "grad_norm": 0.1488441526889801, "learning_rate": 1.8019510549469185e-05, "loss": 0.5414, "step": 2775 }, { "epoch": 0.7026069349531764, "grad_norm": 0.14776454865932465, "learning_rate": 1.8018079974951574e-05, "loss": 0.5214, "step": 2776 }, { "epoch": 0.7028600354340673, "grad_norm": 0.14912188053131104, "learning_rate": 1.801664894077444e-05, "loss": 0.5411, "step": 2777 }, { "epoch": 0.7031131359149583, "grad_norm": 0.1462947577238083, "learning_rate": 1.8015217447019832e-05, "loss": 0.5682, "step": 2778 }, { "epoch": 0.7033662363958492, "grad_norm": 0.1409514993429184, "learning_rate": 1.8013785493769806e-05, "loss": 0.5121, "step": 2779 }, { "epoch": 0.70361933687674, "grad_norm": 0.1406039148569107, "learning_rate": 1.8012353081106463e-05, "loss": 0.5262, "step": 2780 }, { "epoch": 0.7038724373576309, "grad_norm": 0.14898115396499634, "learning_rate": 1.8010920209111908e-05, "loss": 0.537, "step": 2781 }, { "epoch": 0.7041255378385219, "grad_norm": 0.14424747228622437, "learning_rate": 1.8009486877868293e-05, "loss": 0.5258, "step": 2782 }, { "epoch": 0.7043786383194128, "grad_norm": 0.15015709400177002, "learning_rate": 1.8008053087457784e-05, "loss": 0.4968, "step": 2783 }, { "epoch": 0.7046317388003037, "grad_norm": 0.14862953126430511, "learning_rate": 1.800661883796258e-05, "loss": 0.508, "step": 2784 }, { "epoch": 0.7048848392811946, "grad_norm": 0.14401929080486298, "learning_rate": 1.8005184129464905e-05, "loss": 0.5225, "step": 2785 }, { "epoch": 0.7051379397620855, "grad_norm": 0.14750038087368011, "learning_rate": 1.8003748962046997e-05, "loss": 0.5555, "step": 2786 }, { "epoch": 0.7053910402429765, "grad_norm": 0.14002522826194763, "learning_rate": 1.8002313335791143e-05, "loss": 0.5049, "step": 2787 }, { "epoch": 0.7056441407238674, "grad_norm": 0.16281020641326904, "learning_rate": 1.800087725077964e-05, "loss": 0.5209, "step": 2788 }, { "epoch": 0.7058972412047583, "grad_norm": 0.14687448740005493, "learning_rate": 1.799944070709482e-05, "loss": 0.5479, "step": 2789 }, { "epoch": 0.7061503416856492, "grad_norm": 0.1478552669286728, "learning_rate": 1.799800370481903e-05, "loss": 0.5372, "step": 2790 }, { "epoch": 0.7064034421665402, "grad_norm": 0.15007026493549347, "learning_rate": 1.799656624403465e-05, "loss": 0.5264, "step": 2791 }, { "epoch": 0.706656542647431, "grad_norm": 0.14968924224376678, "learning_rate": 1.7995128324824094e-05, "loss": 0.5485, "step": 2792 }, { "epoch": 0.7069096431283219, "grad_norm": 0.1551859825849533, "learning_rate": 1.799368994726979e-05, "loss": 0.5162, "step": 2793 }, { "epoch": 0.7071627436092128, "grad_norm": 0.14584921300411224, "learning_rate": 1.7992251111454198e-05, "loss": 0.5153, "step": 2794 }, { "epoch": 0.7074158440901037, "grad_norm": 0.15128177404403687, "learning_rate": 1.7990811817459802e-05, "loss": 0.5425, "step": 2795 }, { "epoch": 0.7076689445709947, "grad_norm": 0.14871525764465332, "learning_rate": 1.7989372065369118e-05, "loss": 0.534, "step": 2796 }, { "epoch": 0.7079220450518856, "grad_norm": 0.14760757982730865, "learning_rate": 1.7987931855264677e-05, "loss": 0.5279, "step": 2797 }, { "epoch": 0.7081751455327765, "grad_norm": 0.14915476739406586, "learning_rate": 1.798649118722905e-05, "loss": 0.5133, "step": 2798 }, { "epoch": 0.7084282460136674, "grad_norm": 0.14028678834438324, "learning_rate": 1.798505006134482e-05, "loss": 0.5203, "step": 2799 }, { "epoch": 0.7086813464945584, "grad_norm": 0.14444823563098907, "learning_rate": 1.7983608477694616e-05, "loss": 0.5349, "step": 2800 }, { "epoch": 0.7089344469754493, "grad_norm": 0.1436929553747177, "learning_rate": 1.7982166436361067e-05, "loss": 0.5176, "step": 2801 }, { "epoch": 0.7091875474563402, "grad_norm": 0.14400990307331085, "learning_rate": 1.7980723937426848e-05, "loss": 0.5246, "step": 2802 }, { "epoch": 0.709440647937231, "grad_norm": 0.14669135212898254, "learning_rate": 1.7979280980974658e-05, "loss": 0.5344, "step": 2803 }, { "epoch": 0.7096937484181219, "grad_norm": 0.15003477036952972, "learning_rate": 1.7977837567087214e-05, "loss": 0.5285, "step": 2804 }, { "epoch": 0.7099468488990129, "grad_norm": 0.15274128317832947, "learning_rate": 1.7976393695847267e-05, "loss": 0.5622, "step": 2805 }, { "epoch": 0.7101999493799038, "grad_norm": 0.14622192084789276, "learning_rate": 1.7974949367337586e-05, "loss": 0.5131, "step": 2806 }, { "epoch": 0.7104530498607947, "grad_norm": 0.14860200881958008, "learning_rate": 1.7973504581640978e-05, "loss": 0.5315, "step": 2807 }, { "epoch": 0.7107061503416856, "grad_norm": 0.1511184573173523, "learning_rate": 1.7972059338840262e-05, "loss": 0.57, "step": 2808 }, { "epoch": 0.7109592508225766, "grad_norm": 0.14598067104816437, "learning_rate": 1.7970613639018297e-05, "loss": 0.5285, "step": 2809 }, { "epoch": 0.7112123513034675, "grad_norm": 0.15503932535648346, "learning_rate": 1.796916748225796e-05, "loss": 0.5564, "step": 2810 }, { "epoch": 0.7114654517843584, "grad_norm": 0.14427991211414337, "learning_rate": 1.7967720868642156e-05, "loss": 0.5314, "step": 2811 }, { "epoch": 0.7117185522652493, "grad_norm": 0.14314691722393036, "learning_rate": 1.7966273798253815e-05, "loss": 0.5052, "step": 2812 }, { "epoch": 0.7119716527461403, "grad_norm": 0.15298913419246674, "learning_rate": 1.7964826271175896e-05, "loss": 0.5472, "step": 2813 }, { "epoch": 0.7122247532270312, "grad_norm": 0.15115098655223846, "learning_rate": 1.7963378287491383e-05, "loss": 0.5443, "step": 2814 }, { "epoch": 0.712477853707922, "grad_norm": 0.14653074741363525, "learning_rate": 1.7961929847283284e-05, "loss": 0.5363, "step": 2815 }, { "epoch": 0.7127309541888129, "grad_norm": 0.15043623745441437, "learning_rate": 1.7960480950634635e-05, "loss": 0.5715, "step": 2816 }, { "epoch": 0.7129840546697038, "grad_norm": 0.14523740112781525, "learning_rate": 1.7959031597628504e-05, "loss": 0.5246, "step": 2817 }, { "epoch": 0.7132371551505948, "grad_norm": 0.14821510016918182, "learning_rate": 1.7957581788347967e-05, "loss": 0.5442, "step": 2818 }, { "epoch": 0.7134902556314857, "grad_norm": 0.14689341187477112, "learning_rate": 1.795613152287615e-05, "loss": 0.5327, "step": 2819 }, { "epoch": 0.7137433561123766, "grad_norm": 0.1495596319437027, "learning_rate": 1.795468080129619e-05, "loss": 0.52, "step": 2820 }, { "epoch": 0.7139964565932675, "grad_norm": 0.14510971307754517, "learning_rate": 1.7953229623691256e-05, "loss": 0.526, "step": 2821 }, { "epoch": 0.7142495570741585, "grad_norm": 0.14755523204803467, "learning_rate": 1.7951777990144534e-05, "loss": 0.5426, "step": 2822 }, { "epoch": 0.7145026575550494, "grad_norm": 0.1472373902797699, "learning_rate": 1.795032590073925e-05, "loss": 0.5396, "step": 2823 }, { "epoch": 0.7147557580359403, "grad_norm": 0.15088455379009247, "learning_rate": 1.7948873355558647e-05, "loss": 0.5292, "step": 2824 }, { "epoch": 0.7150088585168312, "grad_norm": 0.18523003160953522, "learning_rate": 1.7947420354685995e-05, "loss": 0.543, "step": 2825 }, { "epoch": 0.715261958997722, "grad_norm": 0.14839759469032288, "learning_rate": 1.794596689820459e-05, "loss": 0.5121, "step": 2826 }, { "epoch": 0.715515059478613, "grad_norm": 0.14841121435165405, "learning_rate": 1.794451298619776e-05, "loss": 0.5192, "step": 2827 }, { "epoch": 0.7157681599595039, "grad_norm": 0.1472892314195633, "learning_rate": 1.7943058618748853e-05, "loss": 0.5244, "step": 2828 }, { "epoch": 0.7160212604403948, "grad_norm": 0.1522616297006607, "learning_rate": 1.7941603795941247e-05, "loss": 0.54, "step": 2829 }, { "epoch": 0.7162743609212857, "grad_norm": 0.14400440454483032, "learning_rate": 1.794014851785834e-05, "loss": 0.5232, "step": 2830 }, { "epoch": 0.7165274614021767, "grad_norm": 0.15369334816932678, "learning_rate": 1.793869278458356e-05, "loss": 0.5649, "step": 2831 }, { "epoch": 0.7167805618830676, "grad_norm": 0.15156877040863037, "learning_rate": 1.7937236596200362e-05, "loss": 0.5444, "step": 2832 }, { "epoch": 0.7170336623639585, "grad_norm": 0.1472565233707428, "learning_rate": 1.793577995279223e-05, "loss": 0.5596, "step": 2833 }, { "epoch": 0.7172867628448494, "grad_norm": 0.15399563312530518, "learning_rate": 1.793432285444266e-05, "loss": 0.5209, "step": 2834 }, { "epoch": 0.7175398633257403, "grad_norm": 0.1464615911245346, "learning_rate": 1.79328653012352e-05, "loss": 0.5228, "step": 2835 }, { "epoch": 0.7177929638066313, "grad_norm": 0.1457682102918625, "learning_rate": 1.7931407293253396e-05, "loss": 0.5353, "step": 2836 }, { "epoch": 0.7180460642875222, "grad_norm": 0.14437851309776306, "learning_rate": 1.7929948830580837e-05, "loss": 0.5311, "step": 2837 }, { "epoch": 0.718299164768413, "grad_norm": 0.14645318686962128, "learning_rate": 1.7928489913301132e-05, "loss": 0.534, "step": 2838 }, { "epoch": 0.7185522652493039, "grad_norm": 0.14460812509059906, "learning_rate": 1.792703054149792e-05, "loss": 0.5089, "step": 2839 }, { "epoch": 0.7188053657301949, "grad_norm": 0.14507655799388885, "learning_rate": 1.792557071525486e-05, "loss": 0.5444, "step": 2840 }, { "epoch": 0.7190584662110858, "grad_norm": 0.149433434009552, "learning_rate": 1.7924110434655645e-05, "loss": 0.5198, "step": 2841 }, { "epoch": 0.7193115666919767, "grad_norm": 0.14685337245464325, "learning_rate": 1.7922649699783985e-05, "loss": 0.5351, "step": 2842 }, { "epoch": 0.7195646671728676, "grad_norm": 0.1567326933145523, "learning_rate": 1.792118851072363e-05, "loss": 0.5206, "step": 2843 }, { "epoch": 0.7198177676537585, "grad_norm": 0.14403514564037323, "learning_rate": 1.7919726867558333e-05, "loss": 0.5313, "step": 2844 }, { "epoch": 0.7200708681346495, "grad_norm": 0.14330951869487762, "learning_rate": 1.7918264770371897e-05, "loss": 0.5412, "step": 2845 }, { "epoch": 0.7203239686155404, "grad_norm": 0.18638013303279877, "learning_rate": 1.7916802219248136e-05, "loss": 0.5498, "step": 2846 }, { "epoch": 0.7205770690964313, "grad_norm": 0.15019692480564117, "learning_rate": 1.79153392142709e-05, "loss": 0.5252, "step": 2847 }, { "epoch": 0.7208301695773222, "grad_norm": 0.14442569017410278, "learning_rate": 1.7913875755524058e-05, "loss": 0.5343, "step": 2848 }, { "epoch": 0.7210832700582132, "grad_norm": 0.15182821452617645, "learning_rate": 1.7912411843091505e-05, "loss": 0.5427, "step": 2849 }, { "epoch": 0.721336370539104, "grad_norm": 0.1439221352338791, "learning_rate": 1.791094747705717e-05, "loss": 0.5196, "step": 2850 }, { "epoch": 0.7215894710199949, "grad_norm": 0.1714148223400116, "learning_rate": 1.7909482657504988e-05, "loss": 0.536, "step": 2851 }, { "epoch": 0.7218425715008858, "grad_norm": 0.14558939635753632, "learning_rate": 1.7908017384518946e-05, "loss": 0.5459, "step": 2852 }, { "epoch": 0.7220956719817767, "grad_norm": 0.1415179967880249, "learning_rate": 1.7906551658183047e-05, "loss": 0.5152, "step": 2853 }, { "epoch": 0.7223487724626677, "grad_norm": 0.16388234496116638, "learning_rate": 1.790508547858131e-05, "loss": 0.5399, "step": 2854 }, { "epoch": 0.7226018729435586, "grad_norm": 0.14092190563678741, "learning_rate": 1.7903618845797792e-05, "loss": 0.5031, "step": 2855 }, { "epoch": 0.7228549734244495, "grad_norm": 0.1417672336101532, "learning_rate": 1.790215175991657e-05, "loss": 0.5043, "step": 2856 }, { "epoch": 0.7231080739053404, "grad_norm": 0.14391210675239563, "learning_rate": 1.7900684221021747e-05, "loss": 0.5341, "step": 2857 }, { "epoch": 0.7233611743862314, "grad_norm": 0.1473853588104248, "learning_rate": 1.7899216229197463e-05, "loss": 0.5526, "step": 2858 }, { "epoch": 0.7236142748671223, "grad_norm": 0.14629141986370087, "learning_rate": 1.7897747784527864e-05, "loss": 0.5292, "step": 2859 }, { "epoch": 0.7238673753480132, "grad_norm": 0.14511753618717194, "learning_rate": 1.789627888709714e-05, "loss": 0.5424, "step": 2860 }, { "epoch": 0.724120475828904, "grad_norm": 0.1441185474395752, "learning_rate": 1.7894809536989498e-05, "loss": 0.5352, "step": 2861 }, { "epoch": 0.724373576309795, "grad_norm": 0.1520334780216217, "learning_rate": 1.789333973428917e-05, "loss": 0.5463, "step": 2862 }, { "epoch": 0.7246266767906859, "grad_norm": 0.1481693983078003, "learning_rate": 1.7891869479080418e-05, "loss": 0.5418, "step": 2863 }, { "epoch": 0.7248797772715768, "grad_norm": 0.15643611550331116, "learning_rate": 1.7890398771447534e-05, "loss": 0.5349, "step": 2864 }, { "epoch": 0.7251328777524677, "grad_norm": 0.1441963016986847, "learning_rate": 1.788892761147482e-05, "loss": 0.5285, "step": 2865 }, { "epoch": 0.7253859782333586, "grad_norm": 0.15031108260154724, "learning_rate": 1.7887455999246623e-05, "loss": 0.552, "step": 2866 }, { "epoch": 0.7256390787142496, "grad_norm": 0.1494501680135727, "learning_rate": 1.7885983934847307e-05, "loss": 0.5487, "step": 2867 }, { "epoch": 0.7258921791951405, "grad_norm": 0.14510177075862885, "learning_rate": 1.7884511418361256e-05, "loss": 0.5262, "step": 2868 }, { "epoch": 0.7261452796760314, "grad_norm": 0.15942813456058502, "learning_rate": 1.7883038449872892e-05, "loss": 0.5171, "step": 2869 }, { "epoch": 0.7263983801569223, "grad_norm": 0.1431449055671692, "learning_rate": 1.7881565029466656e-05, "loss": 0.5199, "step": 2870 }, { "epoch": 0.7266514806378133, "grad_norm": 0.14851166307926178, "learning_rate": 1.7880091157227015e-05, "loss": 0.5258, "step": 2871 }, { "epoch": 0.7269045811187042, "grad_norm": 0.1499960571527481, "learning_rate": 1.7878616833238466e-05, "loss": 0.5357, "step": 2872 }, { "epoch": 0.727157681599595, "grad_norm": 0.14485423266887665, "learning_rate": 1.7877142057585525e-05, "loss": 0.5071, "step": 2873 }, { "epoch": 0.7274107820804859, "grad_norm": 0.15008753538131714, "learning_rate": 1.7875666830352737e-05, "loss": 0.5372, "step": 2874 }, { "epoch": 0.7276638825613768, "grad_norm": 0.14161038398742676, "learning_rate": 1.787419115162468e-05, "loss": 0.5088, "step": 2875 }, { "epoch": 0.7279169830422678, "grad_norm": 0.14768251776695251, "learning_rate": 1.787271502148594e-05, "loss": 0.517, "step": 2876 }, { "epoch": 0.7281700835231587, "grad_norm": 0.15208204090595245, "learning_rate": 1.7871238440021154e-05, "loss": 0.5045, "step": 2877 }, { "epoch": 0.7284231840040496, "grad_norm": 0.14633716642856598, "learning_rate": 1.7869761407314966e-05, "loss": 0.5477, "step": 2878 }, { "epoch": 0.7286762844849405, "grad_norm": 0.14961759746074677, "learning_rate": 1.7868283923452046e-05, "loss": 0.5197, "step": 2879 }, { "epoch": 0.7289293849658315, "grad_norm": 0.1462971568107605, "learning_rate": 1.7866805988517102e-05, "loss": 0.5121, "step": 2880 }, { "epoch": 0.7291824854467224, "grad_norm": 0.14941370487213135, "learning_rate": 1.7865327602594855e-05, "loss": 0.5557, "step": 2881 }, { "epoch": 0.7294355859276133, "grad_norm": 0.1482287496328354, "learning_rate": 1.786384876577006e-05, "loss": 0.5476, "step": 2882 }, { "epoch": 0.7296886864085042, "grad_norm": 0.149621844291687, "learning_rate": 1.7862369478127502e-05, "loss": 0.5243, "step": 2883 }, { "epoch": 0.729941786889395, "grad_norm": 0.14481297135353088, "learning_rate": 1.786088973975198e-05, "loss": 0.5, "step": 2884 }, { "epoch": 0.730194887370286, "grad_norm": 0.14421936869621277, "learning_rate": 1.785940955072832e-05, "loss": 0.5492, "step": 2885 }, { "epoch": 0.7304479878511769, "grad_norm": 0.15094096958637238, "learning_rate": 1.7857928911141382e-05, "loss": 0.505, "step": 2886 }, { "epoch": 0.7307010883320678, "grad_norm": 0.14592532813549042, "learning_rate": 1.7856447821076052e-05, "loss": 0.5383, "step": 2887 }, { "epoch": 0.7309541888129587, "grad_norm": 0.1469985991716385, "learning_rate": 1.785496628061723e-05, "loss": 0.5283, "step": 2888 }, { "epoch": 0.7312072892938497, "grad_norm": 0.14522351324558258, "learning_rate": 1.7853484289849854e-05, "loss": 0.4775, "step": 2889 }, { "epoch": 0.7314603897747406, "grad_norm": 0.14416377246379852, "learning_rate": 1.7852001848858883e-05, "loss": 0.5045, "step": 2890 }, { "epoch": 0.7317134902556315, "grad_norm": 0.14876104891300201, "learning_rate": 1.7850518957729305e-05, "loss": 0.5208, "step": 2891 }, { "epoch": 0.7319665907365224, "grad_norm": 0.14898617565631866, "learning_rate": 1.784903561654612e-05, "loss": 0.5489, "step": 2892 }, { "epoch": 0.7322196912174133, "grad_norm": 0.1452137529850006, "learning_rate": 1.784755182539438e-05, "loss": 0.5454, "step": 2893 }, { "epoch": 0.7324727916983043, "grad_norm": 0.15900442004203796, "learning_rate": 1.7846067584359138e-05, "loss": 0.5291, "step": 2894 }, { "epoch": 0.7327258921791951, "grad_norm": 0.14678388833999634, "learning_rate": 1.7844582893525487e-05, "loss": 0.5292, "step": 2895 }, { "epoch": 0.732978992660086, "grad_norm": 0.1841592639684677, "learning_rate": 1.784309775297854e-05, "loss": 0.5152, "step": 2896 }, { "epoch": 0.7332320931409769, "grad_norm": 0.1804284304380417, "learning_rate": 1.7841612162803434e-05, "loss": 0.5267, "step": 2897 }, { "epoch": 0.7334851936218679, "grad_norm": 0.14499682188034058, "learning_rate": 1.7840126123085332e-05, "loss": 0.5372, "step": 2898 }, { "epoch": 0.7337382941027588, "grad_norm": 0.1472013294696808, "learning_rate": 1.7838639633909435e-05, "loss": 0.5349, "step": 2899 }, { "epoch": 0.7339913945836497, "grad_norm": 0.2258056253194809, "learning_rate": 1.783715269536096e-05, "loss": 0.5387, "step": 2900 }, { "epoch": 0.7342444950645406, "grad_norm": 0.14326857030391693, "learning_rate": 1.7835665307525137e-05, "loss": 0.5373, "step": 2901 }, { "epoch": 0.7344975955454315, "grad_norm": 0.14536747336387634, "learning_rate": 1.7834177470487242e-05, "loss": 0.5171, "step": 2902 }, { "epoch": 0.7347506960263225, "grad_norm": 0.15388627350330353, "learning_rate": 1.783268918433258e-05, "loss": 0.5477, "step": 2903 }, { "epoch": 0.7350037965072134, "grad_norm": 0.15929150581359863, "learning_rate": 1.7831200449146457e-05, "loss": 0.5249, "step": 2904 }, { "epoch": 0.7352568969881043, "grad_norm": 0.15158522129058838, "learning_rate": 1.7829711265014224e-05, "loss": 0.5385, "step": 2905 }, { "epoch": 0.7355099974689951, "grad_norm": 0.14774282276630402, "learning_rate": 1.782822163202125e-05, "loss": 0.5179, "step": 2906 }, { "epoch": 0.7357630979498861, "grad_norm": 0.14512324333190918, "learning_rate": 1.7826731550252943e-05, "loss": 0.5443, "step": 2907 }, { "epoch": 0.736016198430777, "grad_norm": 0.6089526414871216, "learning_rate": 1.7825241019794715e-05, "loss": 0.5556, "step": 2908 }, { "epoch": 0.7362692989116679, "grad_norm": 0.14531570672988892, "learning_rate": 1.7823750040732016e-05, "loss": 0.5472, "step": 2909 }, { "epoch": 0.7365223993925588, "grad_norm": 0.14886292815208435, "learning_rate": 1.7822258613150327e-05, "loss": 0.5265, "step": 2910 }, { "epoch": 0.7367754998734498, "grad_norm": 0.15178433060646057, "learning_rate": 1.7820766737135143e-05, "loss": 0.5366, "step": 2911 }, { "epoch": 0.7370286003543407, "grad_norm": 0.15800794959068298, "learning_rate": 1.781927441277199e-05, "loss": 0.5543, "step": 2912 }, { "epoch": 0.7372817008352316, "grad_norm": 0.14842455089092255, "learning_rate": 1.7817781640146426e-05, "loss": 0.5194, "step": 2913 }, { "epoch": 0.7375348013161225, "grad_norm": 0.1446068435907364, "learning_rate": 1.7816288419344018e-05, "loss": 0.5142, "step": 2914 }, { "epoch": 0.7377879017970134, "grad_norm": 0.1476709246635437, "learning_rate": 1.781479475045038e-05, "loss": 0.5527, "step": 2915 }, { "epoch": 0.7380410022779044, "grad_norm": 0.15049278736114502, "learning_rate": 1.7813300633551135e-05, "loss": 0.5229, "step": 2916 }, { "epoch": 0.7382941027587953, "grad_norm": 0.14928176999092102, "learning_rate": 1.7811806068731937e-05, "loss": 0.5459, "step": 2917 }, { "epoch": 0.7385472032396861, "grad_norm": 0.1485675424337387, "learning_rate": 1.7810311056078468e-05, "loss": 0.5202, "step": 2918 }, { "epoch": 0.738800303720577, "grad_norm": 0.14436054229736328, "learning_rate": 1.7808815595676434e-05, "loss": 0.5389, "step": 2919 }, { "epoch": 0.739053404201468, "grad_norm": 0.16016635298728943, "learning_rate": 1.7807319687611565e-05, "loss": 0.5204, "step": 2920 }, { "epoch": 0.7393065046823589, "grad_norm": 0.1503237783908844, "learning_rate": 1.7805823331969625e-05, "loss": 0.5428, "step": 2921 }, { "epoch": 0.7395596051632498, "grad_norm": 0.14970757067203522, "learning_rate": 1.7804326528836387e-05, "loss": 0.5139, "step": 2922 }, { "epoch": 0.7398127056441407, "grad_norm": 0.15020276606082916, "learning_rate": 1.7802829278297663e-05, "loss": 0.5323, "step": 2923 }, { "epoch": 0.7400658061250316, "grad_norm": 0.15152312815189362, "learning_rate": 1.7801331580439288e-05, "loss": 0.5466, "step": 2924 }, { "epoch": 0.7403189066059226, "grad_norm": 0.14687307178974152, "learning_rate": 1.7799833435347127e-05, "loss": 0.5373, "step": 2925 }, { "epoch": 0.7405720070868135, "grad_norm": 0.14877833425998688, "learning_rate": 1.7798334843107056e-05, "loss": 0.5114, "step": 2926 }, { "epoch": 0.7408251075677044, "grad_norm": 0.14333681762218475, "learning_rate": 1.779683580380499e-05, "loss": 0.5213, "step": 2927 }, { "epoch": 0.7410782080485953, "grad_norm": 0.15205039083957672, "learning_rate": 1.779533631752687e-05, "loss": 0.5585, "step": 2928 }, { "epoch": 0.7413313085294863, "grad_norm": 0.14635588228702545, "learning_rate": 1.7793836384358653e-05, "loss": 0.5142, "step": 2929 }, { "epoch": 0.7415844090103771, "grad_norm": 0.14950081706047058, "learning_rate": 1.779233600438633e-05, "loss": 0.5303, "step": 2930 }, { "epoch": 0.741837509491268, "grad_norm": 0.14850197732448578, "learning_rate": 1.7790835177695913e-05, "loss": 0.5561, "step": 2931 }, { "epoch": 0.7420906099721589, "grad_norm": 0.1456206738948822, "learning_rate": 1.7789333904373442e-05, "loss": 0.5173, "step": 2932 }, { "epoch": 0.7423437104530498, "grad_norm": 0.1439744085073471, "learning_rate": 1.778783218450498e-05, "loss": 0.4964, "step": 2933 }, { "epoch": 0.7425968109339408, "grad_norm": 0.15093915164470673, "learning_rate": 1.7786330018176617e-05, "loss": 0.5184, "step": 2934 }, { "epoch": 0.7428499114148317, "grad_norm": 0.15115399658679962, "learning_rate": 1.7784827405474472e-05, "loss": 0.5649, "step": 2935 }, { "epoch": 0.7431030118957226, "grad_norm": 0.1543717235326767, "learning_rate": 1.7783324346484687e-05, "loss": 0.5345, "step": 2936 }, { "epoch": 0.7433561123766135, "grad_norm": 0.16179496049880981, "learning_rate": 1.7781820841293426e-05, "loss": 0.5542, "step": 2937 }, { "epoch": 0.7436092128575045, "grad_norm": 0.1514633148908615, "learning_rate": 1.7780316889986883e-05, "loss": 0.5284, "step": 2938 }, { "epoch": 0.7438623133383954, "grad_norm": 0.15350931882858276, "learning_rate": 1.7778812492651275e-05, "loss": 0.5345, "step": 2939 }, { "epoch": 0.7441154138192863, "grad_norm": 0.1504724770784378, "learning_rate": 1.777730764937285e-05, "loss": 0.5424, "step": 2940 }, { "epoch": 0.7443685143001771, "grad_norm": 0.14843443036079407, "learning_rate": 1.7775802360237877e-05, "loss": 0.5354, "step": 2941 }, { "epoch": 0.744621614781068, "grad_norm": 0.1463453620672226, "learning_rate": 1.7774296625332647e-05, "loss": 0.5058, "step": 2942 }, { "epoch": 0.744874715261959, "grad_norm": 0.15474829077720642, "learning_rate": 1.7772790444743485e-05, "loss": 0.5209, "step": 2943 }, { "epoch": 0.7451278157428499, "grad_norm": 0.14824678003787994, "learning_rate": 1.7771283818556727e-05, "loss": 0.5152, "step": 2944 }, { "epoch": 0.7453809162237408, "grad_norm": 0.15518391132354736, "learning_rate": 1.776977674685876e-05, "loss": 0.5217, "step": 2945 }, { "epoch": 0.7456340167046317, "grad_norm": 0.14659546315670013, "learning_rate": 1.776826922973597e-05, "loss": 0.5318, "step": 2946 }, { "epoch": 0.7458871171855227, "grad_norm": 0.1435319483280182, "learning_rate": 1.7766761267274786e-05, "loss": 0.5375, "step": 2947 }, { "epoch": 0.7461402176664136, "grad_norm": 0.14836201071739197, "learning_rate": 1.7765252859561655e-05, "loss": 0.5419, "step": 2948 }, { "epoch": 0.7463933181473045, "grad_norm": 0.15242090821266174, "learning_rate": 1.7763744006683047e-05, "loss": 0.5492, "step": 2949 }, { "epoch": 0.7466464186281954, "grad_norm": 0.14794063568115234, "learning_rate": 1.7762234708725464e-05, "loss": 0.509, "step": 2950 }, { "epoch": 0.7468995191090863, "grad_norm": 0.14903810620307922, "learning_rate": 1.776072496577543e-05, "loss": 0.5268, "step": 2951 }, { "epoch": 0.7471526195899773, "grad_norm": 0.14988981187343597, "learning_rate": 1.7759214777919496e-05, "loss": 0.5397, "step": 2952 }, { "epoch": 0.7474057200708681, "grad_norm": 0.23774631321430206, "learning_rate": 1.7757704145244238e-05, "loss": 0.5274, "step": 2953 }, { "epoch": 0.747658820551759, "grad_norm": 0.16623346507549286, "learning_rate": 1.7756193067836262e-05, "loss": 0.5404, "step": 2954 }, { "epoch": 0.7479119210326499, "grad_norm": 0.1483059525489807, "learning_rate": 1.7754681545782186e-05, "loss": 0.533, "step": 2955 }, { "epoch": 0.7481650215135409, "grad_norm": 0.15368886291980743, "learning_rate": 1.7753169579168664e-05, "loss": 0.532, "step": 2956 }, { "epoch": 0.7484181219944318, "grad_norm": 0.14759455621242523, "learning_rate": 1.7751657168082383e-05, "loss": 0.5253, "step": 2957 }, { "epoch": 0.7486712224753227, "grad_norm": 0.1529926359653473, "learning_rate": 1.7750144312610033e-05, "loss": 0.5471, "step": 2958 }, { "epoch": 0.7489243229562136, "grad_norm": 0.14726297557353973, "learning_rate": 1.7748631012838353e-05, "loss": 0.5447, "step": 2959 }, { "epoch": 0.7491774234371046, "grad_norm": 0.14679226279258728, "learning_rate": 1.7747117268854093e-05, "loss": 0.5019, "step": 2960 }, { "epoch": 0.7494305239179955, "grad_norm": 0.14927375316619873, "learning_rate": 1.7745603080744032e-05, "loss": 0.5083, "step": 2961 }, { "epoch": 0.7496836243988864, "grad_norm": 0.14736905694007874, "learning_rate": 1.774408844859498e-05, "loss": 0.5464, "step": 2962 }, { "epoch": 0.7499367248797773, "grad_norm": 0.16927950084209442, "learning_rate": 1.7742573372493765e-05, "loss": 0.522, "step": 2963 }, { "epoch": 0.7501898253606681, "grad_norm": 0.14778970181941986, "learning_rate": 1.7741057852527238e-05, "loss": 0.5143, "step": 2964 }, { "epoch": 0.7504429258415591, "grad_norm": 0.1510339081287384, "learning_rate": 1.7739541888782287e-05, "loss": 0.5104, "step": 2965 }, { "epoch": 0.75069602632245, "grad_norm": 0.14848393201828003, "learning_rate": 1.773802548134582e-05, "loss": 0.5317, "step": 2966 }, { "epoch": 0.7509491268033409, "grad_norm": 0.14839640259742737, "learning_rate": 1.7736508630304762e-05, "loss": 0.5395, "step": 2967 }, { "epoch": 0.7512022272842318, "grad_norm": 0.14730995893478394, "learning_rate": 1.7734991335746077e-05, "loss": 0.5198, "step": 2968 }, { "epoch": 0.7514553277651228, "grad_norm": 0.15580318868160248, "learning_rate": 1.7733473597756746e-05, "loss": 0.5475, "step": 2969 }, { "epoch": 0.7517084282460137, "grad_norm": 0.2519559860229492, "learning_rate": 1.7731955416423778e-05, "loss": 0.5294, "step": 2970 }, { "epoch": 0.7519615287269046, "grad_norm": 0.14028480648994446, "learning_rate": 1.773043679183421e-05, "loss": 0.5416, "step": 2971 }, { "epoch": 0.7522146292077955, "grad_norm": 0.14994113147258759, "learning_rate": 1.7728917724075096e-05, "loss": 0.5408, "step": 2972 }, { "epoch": 0.7524677296886864, "grad_norm": 0.14393679797649384, "learning_rate": 1.7727398213233525e-05, "loss": 0.5421, "step": 2973 }, { "epoch": 0.7527208301695774, "grad_norm": 0.14897584915161133, "learning_rate": 1.7725878259396605e-05, "loss": 0.5341, "step": 2974 }, { "epoch": 0.7529739306504682, "grad_norm": 0.16702575981616974, "learning_rate": 1.7724357862651474e-05, "loss": 0.5156, "step": 2975 }, { "epoch": 0.7532270311313591, "grad_norm": 0.15600398182868958, "learning_rate": 1.772283702308529e-05, "loss": 0.5292, "step": 2976 }, { "epoch": 0.75348013161225, "grad_norm": 0.15482543408870697, "learning_rate": 1.7721315740785244e-05, "loss": 0.5635, "step": 2977 }, { "epoch": 0.753733232093141, "grad_norm": 0.14389260113239288, "learning_rate": 1.7719794015838547e-05, "loss": 0.5263, "step": 2978 }, { "epoch": 0.7539863325740319, "grad_norm": 0.14956559240818024, "learning_rate": 1.771827184833243e-05, "loss": 0.5106, "step": 2979 }, { "epoch": 0.7542394330549228, "grad_norm": 0.1460314691066742, "learning_rate": 1.771674923835416e-05, "loss": 0.5413, "step": 2980 }, { "epoch": 0.7544925335358137, "grad_norm": 0.15577438473701477, "learning_rate": 1.7715226185991028e-05, "loss": 0.537, "step": 2981 }, { "epoch": 0.7547456340167046, "grad_norm": 0.17240791022777557, "learning_rate": 1.7713702691330346e-05, "loss": 0.5057, "step": 2982 }, { "epoch": 0.7549987344975956, "grad_norm": 0.14926257729530334, "learning_rate": 1.771217875445945e-05, "loss": 0.5397, "step": 2983 }, { "epoch": 0.7552518349784865, "grad_norm": 0.1437961757183075, "learning_rate": 1.7710654375465705e-05, "loss": 0.5347, "step": 2984 }, { "epoch": 0.7555049354593774, "grad_norm": 0.14255636930465698, "learning_rate": 1.77091295544365e-05, "loss": 0.5273, "step": 2985 }, { "epoch": 0.7557580359402682, "grad_norm": 0.1450057029724121, "learning_rate": 1.770760429145925e-05, "loss": 0.5289, "step": 2986 }, { "epoch": 0.7560111364211592, "grad_norm": 0.14657136797904968, "learning_rate": 1.77060785866214e-05, "loss": 0.5278, "step": 2987 }, { "epoch": 0.7562642369020501, "grad_norm": 0.1571706384420395, "learning_rate": 1.7704552440010406e-05, "loss": 0.5494, "step": 2988 }, { "epoch": 0.756517337382941, "grad_norm": 0.1852877289056778, "learning_rate": 1.7703025851713768e-05, "loss": 0.5202, "step": 2989 }, { "epoch": 0.7567704378638319, "grad_norm": 0.15663079917430878, "learning_rate": 1.7701498821818993e-05, "loss": 0.525, "step": 2990 }, { "epoch": 0.7570235383447228, "grad_norm": 0.1503055989742279, "learning_rate": 1.769997135041363e-05, "loss": 0.5151, "step": 2991 }, { "epoch": 0.7572766388256138, "grad_norm": 0.15526209771633148, "learning_rate": 1.7698443437585244e-05, "loss": 0.5367, "step": 2992 }, { "epoch": 0.7575297393065047, "grad_norm": 0.1541331559419632, "learning_rate": 1.7696915083421427e-05, "loss": 0.5565, "step": 2993 }, { "epoch": 0.7577828397873956, "grad_norm": 0.15073953568935394, "learning_rate": 1.769538628800979e-05, "loss": 0.5038, "step": 2994 }, { "epoch": 0.7580359402682865, "grad_norm": 0.16339759528636932, "learning_rate": 1.7693857051437986e-05, "loss": 0.5062, "step": 2995 }, { "epoch": 0.7582890407491775, "grad_norm": 0.15231944620609283, "learning_rate": 1.7692327373793675e-05, "loss": 0.5424, "step": 2996 }, { "epoch": 0.7585421412300684, "grad_norm": 0.1665373295545578, "learning_rate": 1.7690797255164557e-05, "loss": 0.505, "step": 2997 }, { "epoch": 0.7587952417109592, "grad_norm": 0.1562441736459732, "learning_rate": 1.7689266695638345e-05, "loss": 0.5234, "step": 2998 }, { "epoch": 0.7590483421918501, "grad_norm": 0.1558438539505005, "learning_rate": 1.768773569530278e-05, "loss": 0.5427, "step": 2999 }, { "epoch": 0.7593014426727411, "grad_norm": 0.1525963395833969, "learning_rate": 1.7686204254245638e-05, "loss": 0.5334, "step": 3000 }, { "epoch": 0.759554543153632, "grad_norm": 0.15848256647586823, "learning_rate": 1.768467237255471e-05, "loss": 0.5222, "step": 3001 }, { "epoch": 0.7598076436345229, "grad_norm": 0.14888495206832886, "learning_rate": 1.7683140050317816e-05, "loss": 0.5297, "step": 3002 }, { "epoch": 0.7600607441154138, "grad_norm": 0.15064217150211334, "learning_rate": 1.76816072876228e-05, "loss": 0.5171, "step": 3003 }, { "epoch": 0.7603138445963047, "grad_norm": 0.15602967143058777, "learning_rate": 1.7680074084557533e-05, "loss": 0.5343, "step": 3004 }, { "epoch": 0.7605669450771957, "grad_norm": 0.150221049785614, "learning_rate": 1.7678540441209914e-05, "loss": 0.5375, "step": 3005 }, { "epoch": 0.7608200455580866, "grad_norm": 0.15444840490818024, "learning_rate": 1.7677006357667855e-05, "loss": 0.5289, "step": 3006 }, { "epoch": 0.7610731460389775, "grad_norm": 0.15188942849636078, "learning_rate": 1.7675471834019307e-05, "loss": 0.5396, "step": 3007 }, { "epoch": 0.7613262465198684, "grad_norm": 0.15647144615650177, "learning_rate": 1.767393687035224e-05, "loss": 0.5393, "step": 3008 }, { "epoch": 0.7615793470007594, "grad_norm": 0.15710274875164032, "learning_rate": 1.7672401466754653e-05, "loss": 0.551, "step": 3009 }, { "epoch": 0.7618324474816502, "grad_norm": 0.14301566779613495, "learning_rate": 1.7670865623314563e-05, "loss": 0.5356, "step": 3010 }, { "epoch": 0.7620855479625411, "grad_norm": 0.15355347096920013, "learning_rate": 1.766932934012002e-05, "loss": 0.5102, "step": 3011 }, { "epoch": 0.762338648443432, "grad_norm": 0.14850030839443207, "learning_rate": 1.766779261725909e-05, "loss": 0.5363, "step": 3012 }, { "epoch": 0.7625917489243229, "grad_norm": 0.14782600104808807, "learning_rate": 1.766625545481988e-05, "loss": 0.5195, "step": 3013 }, { "epoch": 0.7628448494052139, "grad_norm": 0.15266305208206177, "learning_rate": 1.7664717852890506e-05, "loss": 0.5204, "step": 3014 }, { "epoch": 0.7630979498861048, "grad_norm": 0.15124641358852386, "learning_rate": 1.7663179811559112e-05, "loss": 0.5319, "step": 3015 }, { "epoch": 0.7633510503669957, "grad_norm": 0.15111200511455536, "learning_rate": 1.7661641330913878e-05, "loss": 0.5351, "step": 3016 }, { "epoch": 0.7636041508478866, "grad_norm": 0.15270401537418365, "learning_rate": 1.7660102411042998e-05, "loss": 0.5143, "step": 3017 }, { "epoch": 0.7638572513287776, "grad_norm": 0.15359775722026825, "learning_rate": 1.7658563052034697e-05, "loss": 0.5495, "step": 3018 }, { "epoch": 0.7641103518096685, "grad_norm": 0.14975665509700775, "learning_rate": 1.7657023253977222e-05, "loss": 0.5194, "step": 3019 }, { "epoch": 0.7643634522905594, "grad_norm": 0.1495814472436905, "learning_rate": 1.7655483016958844e-05, "loss": 0.5133, "step": 3020 }, { "epoch": 0.7646165527714502, "grad_norm": 0.1468273401260376, "learning_rate": 1.7653942341067867e-05, "loss": 0.5532, "step": 3021 }, { "epoch": 0.7648696532523411, "grad_norm": 0.2196628600358963, "learning_rate": 1.7652401226392608e-05, "loss": 0.5279, "step": 3022 }, { "epoch": 0.7651227537332321, "grad_norm": 0.1500619500875473, "learning_rate": 1.765085967302142e-05, "loss": 0.5471, "step": 3023 }, { "epoch": 0.765375854214123, "grad_norm": 0.14568065106868744, "learning_rate": 1.7649317681042676e-05, "loss": 0.5312, "step": 3024 }, { "epoch": 0.7656289546950139, "grad_norm": 0.15396185219287872, "learning_rate": 1.764777525054478e-05, "loss": 0.5609, "step": 3025 }, { "epoch": 0.7658820551759048, "grad_norm": 0.16178679466247559, "learning_rate": 1.7646232381616147e-05, "loss": 0.5326, "step": 3026 }, { "epoch": 0.7661351556567958, "grad_norm": 0.14604564011096954, "learning_rate": 1.764468907434523e-05, "loss": 0.5096, "step": 3027 }, { "epoch": 0.7663882561376867, "grad_norm": 0.15339668095111847, "learning_rate": 1.7643145328820508e-05, "loss": 0.5429, "step": 3028 }, { "epoch": 0.7666413566185776, "grad_norm": 0.177531898021698, "learning_rate": 1.7641601145130476e-05, "loss": 0.5295, "step": 3029 }, { "epoch": 0.7668944570994685, "grad_norm": 0.14776462316513062, "learning_rate": 1.764005652336366e-05, "loss": 0.522, "step": 3030 }, { "epoch": 0.7671475575803594, "grad_norm": 0.15326423943042755, "learning_rate": 1.763851146360861e-05, "loss": 0.5607, "step": 3031 }, { "epoch": 0.7674006580612504, "grad_norm": 0.1485346108675003, "learning_rate": 1.76369659659539e-05, "loss": 0.5401, "step": 3032 }, { "epoch": 0.7676537585421412, "grad_norm": 0.15064039826393127, "learning_rate": 1.7635420030488136e-05, "loss": 0.5439, "step": 3033 }, { "epoch": 0.7679068590230321, "grad_norm": 0.16441243886947632, "learning_rate": 1.7633873657299932e-05, "loss": 0.5329, "step": 3034 }, { "epoch": 0.768159959503923, "grad_norm": 0.1476014405488968, "learning_rate": 1.7632326846477946e-05, "loss": 0.5312, "step": 3035 }, { "epoch": 0.768413059984814, "grad_norm": 0.15498928725719452, "learning_rate": 1.7630779598110855e-05, "loss": 0.5561, "step": 3036 }, { "epoch": 0.7686661604657049, "grad_norm": 0.14413794875144958, "learning_rate": 1.7629231912287355e-05, "loss": 0.5048, "step": 3037 }, { "epoch": 0.7689192609465958, "grad_norm": 0.14934541285037994, "learning_rate": 1.762768378909617e-05, "loss": 0.5343, "step": 3038 }, { "epoch": 0.7691723614274867, "grad_norm": 0.1492408812046051, "learning_rate": 1.7626135228626057e-05, "loss": 0.5277, "step": 3039 }, { "epoch": 0.7694254619083776, "grad_norm": 0.14385581016540527, "learning_rate": 1.7624586230965785e-05, "loss": 0.5309, "step": 3040 }, { "epoch": 0.7696785623892686, "grad_norm": 0.16025836765766144, "learning_rate": 1.762303679620416e-05, "loss": 0.4861, "step": 3041 }, { "epoch": 0.7699316628701595, "grad_norm": 0.15009579062461853, "learning_rate": 1.7621486924430006e-05, "loss": 0.5447, "step": 3042 }, { "epoch": 0.7701847633510503, "grad_norm": 0.17060710489749908, "learning_rate": 1.7619936615732172e-05, "loss": 0.5049, "step": 3043 }, { "epoch": 0.7704378638319412, "grad_norm": 0.151773139834404, "learning_rate": 1.761838587019954e-05, "loss": 0.541, "step": 3044 }, { "epoch": 0.7706909643128322, "grad_norm": 0.14400655031204224, "learning_rate": 1.7616834687921e-05, "loss": 0.5236, "step": 3045 }, { "epoch": 0.7709440647937231, "grad_norm": 0.14931342005729675, "learning_rate": 1.7615283068985488e-05, "loss": 0.5249, "step": 3046 }, { "epoch": 0.771197165274614, "grad_norm": 0.1493956744670868, "learning_rate": 1.761373101348195e-05, "loss": 0.5402, "step": 3047 }, { "epoch": 0.7714502657555049, "grad_norm": 0.14991344511508942, "learning_rate": 1.7612178521499368e-05, "loss": 0.5284, "step": 3048 }, { "epoch": 0.7717033662363959, "grad_norm": 0.17889820039272308, "learning_rate": 1.7610625593126736e-05, "loss": 0.5188, "step": 3049 }, { "epoch": 0.7719564667172868, "grad_norm": 0.14607857167720795, "learning_rate": 1.760907222845308e-05, "loss": 0.5412, "step": 3050 }, { "epoch": 0.7722095671981777, "grad_norm": 0.1670723855495453, "learning_rate": 1.760751842756746e-05, "loss": 0.5264, "step": 3051 }, { "epoch": 0.7724626676790686, "grad_norm": 0.15447920560836792, "learning_rate": 1.760596419055894e-05, "loss": 0.5309, "step": 3052 }, { "epoch": 0.7727157681599595, "grad_norm": 0.14942969381809235, "learning_rate": 1.760440951751663e-05, "loss": 0.5255, "step": 3053 }, { "epoch": 0.7729688686408505, "grad_norm": 0.14934472739696503, "learning_rate": 1.760285440852965e-05, "loss": 0.5333, "step": 3054 }, { "epoch": 0.7732219691217413, "grad_norm": 0.14957554638385773, "learning_rate": 1.7601298863687158e-05, "loss": 0.5171, "step": 3055 }, { "epoch": 0.7734750696026322, "grad_norm": 0.16215239465236664, "learning_rate": 1.7599742883078324e-05, "loss": 0.5404, "step": 3056 }, { "epoch": 0.7737281700835231, "grad_norm": 0.14555588364601135, "learning_rate": 1.7598186466792355e-05, "loss": 0.5077, "step": 3057 }, { "epoch": 0.7739812705644141, "grad_norm": 0.15099303424358368, "learning_rate": 1.759662961491847e-05, "loss": 0.5154, "step": 3058 }, { "epoch": 0.774234371045305, "grad_norm": 0.16148875653743744, "learning_rate": 1.7595072327545923e-05, "loss": 0.5071, "step": 3059 }, { "epoch": 0.7744874715261959, "grad_norm": 0.15326759219169617, "learning_rate": 1.759351460476399e-05, "loss": 0.5523, "step": 3060 }, { "epoch": 0.7747405720070868, "grad_norm": 0.13800647854804993, "learning_rate": 1.7591956446661974e-05, "loss": 0.5073, "step": 3061 }, { "epoch": 0.7749936724879777, "grad_norm": 0.15216606855392456, "learning_rate": 1.7590397853329203e-05, "loss": 0.54, "step": 3062 }, { "epoch": 0.7752467729688687, "grad_norm": 0.14922036230564117, "learning_rate": 1.758883882485502e-05, "loss": 0.5434, "step": 3063 }, { "epoch": 0.7754998734497596, "grad_norm": 0.14790558815002441, "learning_rate": 1.7587279361328805e-05, "loss": 0.5239, "step": 3064 }, { "epoch": 0.7757529739306505, "grad_norm": 0.14883247017860413, "learning_rate": 1.758571946283996e-05, "loss": 0.5324, "step": 3065 }, { "epoch": 0.7760060744115413, "grad_norm": 0.16245825588703156, "learning_rate": 1.7584159129477908e-05, "loss": 0.5258, "step": 3066 }, { "epoch": 0.7762591748924323, "grad_norm": 0.14453540742397308, "learning_rate": 1.7582598361332103e-05, "loss": 0.5489, "step": 3067 }, { "epoch": 0.7765122753733232, "grad_norm": 0.1497040092945099, "learning_rate": 1.7581037158492015e-05, "loss": 0.5344, "step": 3068 }, { "epoch": 0.7767653758542141, "grad_norm": 0.15549802780151367, "learning_rate": 1.7579475521047152e-05, "loss": 0.5162, "step": 3069 }, { "epoch": 0.777018476335105, "grad_norm": 0.1497444063425064, "learning_rate": 1.7577913449087028e-05, "loss": 0.524, "step": 3070 }, { "epoch": 0.7772715768159959, "grad_norm": 0.1515781432390213, "learning_rate": 1.757635094270121e-05, "loss": 0.5101, "step": 3071 }, { "epoch": 0.7775246772968869, "grad_norm": 0.15219537913799286, "learning_rate": 1.7574788001979255e-05, "loss": 0.5372, "step": 3072 }, { "epoch": 0.7777777777777778, "grad_norm": 0.1431799679994583, "learning_rate": 1.7573224627010778e-05, "loss": 0.5365, "step": 3073 }, { "epoch": 0.7780308782586687, "grad_norm": 0.14396248757839203, "learning_rate": 1.75716608178854e-05, "loss": 0.5347, "step": 3074 }, { "epoch": 0.7782839787395596, "grad_norm": 0.1475006639957428, "learning_rate": 1.757009657469276e-05, "loss": 0.5199, "step": 3075 }, { "epoch": 0.7785370792204506, "grad_norm": 0.14710633456707, "learning_rate": 1.756853189752255e-05, "loss": 0.5318, "step": 3076 }, { "epoch": 0.7787901797013415, "grad_norm": 0.14377863705158234, "learning_rate": 1.7566966786464457e-05, "loss": 0.5217, "step": 3077 }, { "epoch": 0.7790432801822323, "grad_norm": 0.14841139316558838, "learning_rate": 1.7565401241608205e-05, "loss": 0.5151, "step": 3078 }, { "epoch": 0.7792963806631232, "grad_norm": 0.14787758886814117, "learning_rate": 1.7563835263043556e-05, "loss": 0.5102, "step": 3079 }, { "epoch": 0.7795494811440141, "grad_norm": 0.15038910508155823, "learning_rate": 1.756226885086027e-05, "loss": 0.5221, "step": 3080 }, { "epoch": 0.7798025816249051, "grad_norm": 0.14832082390785217, "learning_rate": 1.7560702005148156e-05, "loss": 0.5488, "step": 3081 }, { "epoch": 0.780055682105796, "grad_norm": 0.17327068746089935, "learning_rate": 1.7559134725997034e-05, "loss": 0.541, "step": 3082 }, { "epoch": 0.7803087825866869, "grad_norm": 0.17171186208724976, "learning_rate": 1.755756701349675e-05, "loss": 0.5284, "step": 3083 }, { "epoch": 0.7805618830675778, "grad_norm": 0.14251437783241272, "learning_rate": 1.7555998867737184e-05, "loss": 0.5228, "step": 3084 }, { "epoch": 0.7808149835484688, "grad_norm": 0.14049778878688812, "learning_rate": 1.7554430288808228e-05, "loss": 0.5065, "step": 3085 }, { "epoch": 0.7810680840293597, "grad_norm": 0.1463402658700943, "learning_rate": 1.7552861276799812e-05, "loss": 0.5221, "step": 3086 }, { "epoch": 0.7813211845102506, "grad_norm": 0.15021516382694244, "learning_rate": 1.7551291831801876e-05, "loss": 0.5149, "step": 3087 }, { "epoch": 0.7815742849911415, "grad_norm": 0.14419084787368774, "learning_rate": 1.75497219539044e-05, "loss": 0.5424, "step": 3088 }, { "epoch": 0.7818273854720323, "grad_norm": 0.1514299064874649, "learning_rate": 1.754815164319738e-05, "loss": 0.5362, "step": 3089 }, { "epoch": 0.7820804859529233, "grad_norm": 0.18080902099609375, "learning_rate": 1.754658089977084e-05, "loss": 0.5379, "step": 3090 }, { "epoch": 0.7823335864338142, "grad_norm": 0.1468861997127533, "learning_rate": 1.754500972371482e-05, "loss": 0.5347, "step": 3091 }, { "epoch": 0.7825866869147051, "grad_norm": 0.1527579426765442, "learning_rate": 1.7543438115119397e-05, "loss": 0.5425, "step": 3092 }, { "epoch": 0.782839787395596, "grad_norm": 0.14295974373817444, "learning_rate": 1.754186607407467e-05, "loss": 0.54, "step": 3093 }, { "epoch": 0.783092887876487, "grad_norm": 0.1470569223165512, "learning_rate": 1.7540293600670758e-05, "loss": 0.5512, "step": 3094 }, { "epoch": 0.7833459883573779, "grad_norm": 0.1645229160785675, "learning_rate": 1.7538720694997814e-05, "loss": 0.5642, "step": 3095 }, { "epoch": 0.7835990888382688, "grad_norm": 0.41865524649620056, "learning_rate": 1.7537147357145998e-05, "loss": 0.5418, "step": 3096 }, { "epoch": 0.7838521893191597, "grad_norm": 0.1472993940114975, "learning_rate": 1.7535573587205514e-05, "loss": 0.5286, "step": 3097 }, { "epoch": 0.7841052898000507, "grad_norm": 0.14632809162139893, "learning_rate": 1.7533999385266582e-05, "loss": 0.5716, "step": 3098 }, { "epoch": 0.7843583902809416, "grad_norm": 0.14350329339504242, "learning_rate": 1.7532424751419445e-05, "loss": 0.5391, "step": 3099 }, { "epoch": 0.7846114907618325, "grad_norm": 0.16144879162311554, "learning_rate": 1.7530849685754377e-05, "loss": 0.5133, "step": 3100 }, { "epoch": 0.7848645912427233, "grad_norm": 0.14275042712688446, "learning_rate": 1.7529274188361673e-05, "loss": 0.5426, "step": 3101 }, { "epoch": 0.7851176917236142, "grad_norm": 0.15260683000087738, "learning_rate": 1.7527698259331645e-05, "loss": 0.5563, "step": 3102 }, { "epoch": 0.7853707922045052, "grad_norm": 0.16672103106975555, "learning_rate": 1.7526121898754648e-05, "loss": 0.5346, "step": 3103 }, { "epoch": 0.7856238926853961, "grad_norm": 0.14481595158576965, "learning_rate": 1.752454510672105e-05, "loss": 0.5507, "step": 3104 }, { "epoch": 0.785876993166287, "grad_norm": 0.1431320309638977, "learning_rate": 1.7522967883321236e-05, "loss": 0.5097, "step": 3105 }, { "epoch": 0.7861300936471779, "grad_norm": 0.14806000888347626, "learning_rate": 1.7521390228645635e-05, "loss": 0.5266, "step": 3106 }, { "epoch": 0.7863831941280689, "grad_norm": 0.1451040655374527, "learning_rate": 1.7519812142784687e-05, "loss": 0.5436, "step": 3107 }, { "epoch": 0.7866362946089598, "grad_norm": 0.14261282980442047, "learning_rate": 1.751823362582886e-05, "loss": 0.5431, "step": 3108 }, { "epoch": 0.7868893950898507, "grad_norm": 0.1590554565191269, "learning_rate": 1.7516654677868645e-05, "loss": 0.538, "step": 3109 }, { "epoch": 0.7871424955707416, "grad_norm": 0.16258887946605682, "learning_rate": 1.7515075298994566e-05, "loss": 0.5309, "step": 3110 }, { "epoch": 0.7873955960516325, "grad_norm": 0.14656253159046173, "learning_rate": 1.7513495489297158e-05, "loss": 0.506, "step": 3111 }, { "epoch": 0.7876486965325235, "grad_norm": 0.14780651032924652, "learning_rate": 1.7511915248866993e-05, "loss": 0.5291, "step": 3112 }, { "epoch": 0.7879017970134143, "grad_norm": 0.2545357644557953, "learning_rate": 1.7510334577794662e-05, "loss": 0.5201, "step": 3113 }, { "epoch": 0.7881548974943052, "grad_norm": 0.1433885544538498, "learning_rate": 1.7508753476170778e-05, "loss": 0.521, "step": 3114 }, { "epoch": 0.7884079979751961, "grad_norm": 0.1426301747560501, "learning_rate": 1.750717194408599e-05, "loss": 0.5202, "step": 3115 }, { "epoch": 0.7886610984560871, "grad_norm": 0.14188823103904724, "learning_rate": 1.7505589981630955e-05, "loss": 0.5006, "step": 3116 }, { "epoch": 0.788914198936978, "grad_norm": 0.15551066398620605, "learning_rate": 1.7504007588896366e-05, "loss": 0.5568, "step": 3117 }, { "epoch": 0.7891672994178689, "grad_norm": 0.1495133489370346, "learning_rate": 1.7502424765972944e-05, "loss": 0.5346, "step": 3118 }, { "epoch": 0.7894203998987598, "grad_norm": 0.14981497824192047, "learning_rate": 1.7500841512951422e-05, "loss": 0.5241, "step": 3119 }, { "epoch": 0.7896735003796507, "grad_norm": 0.14472204446792603, "learning_rate": 1.7499257829922573e-05, "loss": 0.5329, "step": 3120 }, { "epoch": 0.7899266008605417, "grad_norm": 0.1536359339952469, "learning_rate": 1.7497673716977174e-05, "loss": 0.5196, "step": 3121 }, { "epoch": 0.7901797013414326, "grad_norm": 0.23148512840270996, "learning_rate": 1.7496089174206044e-05, "loss": 0.5343, "step": 3122 }, { "epoch": 0.7904328018223234, "grad_norm": 0.14624980092048645, "learning_rate": 1.7494504201700026e-05, "loss": 0.5281, "step": 3123 }, { "epoch": 0.7906859023032143, "grad_norm": 0.14661769568920135, "learning_rate": 1.7492918799549977e-05, "loss": 0.5172, "step": 3124 }, { "epoch": 0.7909390027841053, "grad_norm": 0.14322149753570557, "learning_rate": 1.7491332967846792e-05, "loss": 0.5349, "step": 3125 }, { "epoch": 0.7911921032649962, "grad_norm": 0.14741265773773193, "learning_rate": 1.7489746706681376e-05, "loss": 0.5275, "step": 3126 }, { "epoch": 0.7914452037458871, "grad_norm": 0.14919212460517883, "learning_rate": 1.7488160016144672e-05, "loss": 0.5266, "step": 3127 }, { "epoch": 0.791698304226778, "grad_norm": 0.14996758103370667, "learning_rate": 1.7486572896327635e-05, "loss": 0.541, "step": 3128 }, { "epoch": 0.7919514047076689, "grad_norm": 0.15300825238227844, "learning_rate": 1.7484985347321254e-05, "loss": 0.5179, "step": 3129 }, { "epoch": 0.7922045051885599, "grad_norm": 0.15273669362068176, "learning_rate": 1.7483397369216543e-05, "loss": 0.5135, "step": 3130 }, { "epoch": 0.7924576056694508, "grad_norm": 0.14794354140758514, "learning_rate": 1.7481808962104536e-05, "loss": 0.5397, "step": 3131 }, { "epoch": 0.7927107061503417, "grad_norm": 0.14445775747299194, "learning_rate": 1.7480220126076287e-05, "loss": 0.5334, "step": 3132 }, { "epoch": 0.7929638066312326, "grad_norm": 0.15021532773971558, "learning_rate": 1.747863086122289e-05, "loss": 0.5534, "step": 3133 }, { "epoch": 0.7932169071121236, "grad_norm": 0.14712731540203094, "learning_rate": 1.7477041167635448e-05, "loss": 0.5136, "step": 3134 }, { "epoch": 0.7934700075930144, "grad_norm": 0.14748620986938477, "learning_rate": 1.7475451045405098e-05, "loss": 0.5219, "step": 3135 }, { "epoch": 0.7937231080739053, "grad_norm": 0.14623963832855225, "learning_rate": 1.7473860494622995e-05, "loss": 0.5267, "step": 3136 }, { "epoch": 0.7939762085547962, "grad_norm": 0.14472338557243347, "learning_rate": 1.7472269515380325e-05, "loss": 0.5269, "step": 3137 }, { "epoch": 0.7942293090356871, "grad_norm": 0.14391745626926422, "learning_rate": 1.747067810776829e-05, "loss": 0.5367, "step": 3138 }, { "epoch": 0.7944824095165781, "grad_norm": 0.22898557782173157, "learning_rate": 1.7469086271878132e-05, "loss": 0.5164, "step": 3139 }, { "epoch": 0.794735509997469, "grad_norm": 0.1457047462463379, "learning_rate": 1.74674940078011e-05, "loss": 0.5, "step": 3140 }, { "epoch": 0.7949886104783599, "grad_norm": 0.1578938215970993, "learning_rate": 1.746590131562848e-05, "loss": 0.5397, "step": 3141 }, { "epoch": 0.7952417109592508, "grad_norm": 0.1459478735923767, "learning_rate": 1.746430819545157e-05, "loss": 0.5432, "step": 3142 }, { "epoch": 0.7954948114401418, "grad_norm": 0.1432630568742752, "learning_rate": 1.7462714647361704e-05, "loss": 0.5264, "step": 3143 }, { "epoch": 0.7957479119210327, "grad_norm": 0.17483381927013397, "learning_rate": 1.746112067145024e-05, "loss": 0.5239, "step": 3144 }, { "epoch": 0.7960010124019236, "grad_norm": 0.15125927329063416, "learning_rate": 1.7459526267808554e-05, "loss": 0.5368, "step": 3145 }, { "epoch": 0.7962541128828144, "grad_norm": 0.16469672322273254, "learning_rate": 1.7457931436528047e-05, "loss": 0.5258, "step": 3146 }, { "epoch": 0.7965072133637054, "grad_norm": 0.15689164400100708, "learning_rate": 1.7456336177700156e-05, "loss": 0.5478, "step": 3147 }, { "epoch": 0.7967603138445963, "grad_norm": 0.1473947912454605, "learning_rate": 1.745474049141633e-05, "loss": 0.5448, "step": 3148 }, { "epoch": 0.7970134143254872, "grad_norm": 0.144256129860878, "learning_rate": 1.745314437776804e-05, "loss": 0.5333, "step": 3149 }, { "epoch": 0.7972665148063781, "grad_norm": 0.14789961278438568, "learning_rate": 1.7451547836846792e-05, "loss": 0.5524, "step": 3150 }, { "epoch": 0.797519615287269, "grad_norm": 0.15695874392986298, "learning_rate": 1.744995086874412e-05, "loss": 0.5175, "step": 3151 }, { "epoch": 0.79777271576816, "grad_norm": 0.1487826704978943, "learning_rate": 1.7448353473551562e-05, "loss": 0.5258, "step": 3152 }, { "epoch": 0.7980258162490509, "grad_norm": 0.16587235033512115, "learning_rate": 1.74467556513607e-05, "loss": 0.5524, "step": 3153 }, { "epoch": 0.7982789167299418, "grad_norm": 0.14169293642044067, "learning_rate": 1.7445157402263136e-05, "loss": 0.4876, "step": 3154 }, { "epoch": 0.7985320172108327, "grad_norm": 0.14788098633289337, "learning_rate": 1.7443558726350487e-05, "loss": 0.5213, "step": 3155 }, { "epoch": 0.7987851176917237, "grad_norm": 0.1869196593761444, "learning_rate": 1.744195962371441e-05, "loss": 0.5223, "step": 3156 }, { "epoch": 0.7990382181726146, "grad_norm": 0.1450669914484024, "learning_rate": 1.744036009444657e-05, "loss": 0.5241, "step": 3157 }, { "epoch": 0.7992913186535054, "grad_norm": 0.14606769382953644, "learning_rate": 1.7438760138638667e-05, "loss": 0.5261, "step": 3158 }, { "epoch": 0.7995444191343963, "grad_norm": 0.15291200578212738, "learning_rate": 1.7437159756382432e-05, "loss": 0.5507, "step": 3159 }, { "epoch": 0.7997975196152872, "grad_norm": 0.14692464470863342, "learning_rate": 1.74355589477696e-05, "loss": 0.5357, "step": 3160 }, { "epoch": 0.8000506200961782, "grad_norm": 0.14842897653579712, "learning_rate": 1.7433957712891946e-05, "loss": 0.5375, "step": 3161 }, { "epoch": 0.8003037205770691, "grad_norm": 0.21580153703689575, "learning_rate": 1.7432356051841265e-05, "loss": 0.5178, "step": 3162 }, { "epoch": 0.80055682105796, "grad_norm": 0.14377190172672272, "learning_rate": 1.743075396470938e-05, "loss": 0.5037, "step": 3163 }, { "epoch": 0.8008099215388509, "grad_norm": 0.14572538435459137, "learning_rate": 1.742915145158813e-05, "loss": 0.5235, "step": 3164 }, { "epoch": 0.8010630220197419, "grad_norm": 0.14468161761760712, "learning_rate": 1.7427548512569384e-05, "loss": 0.571, "step": 3165 }, { "epoch": 0.8013161225006328, "grad_norm": 0.1520310938358307, "learning_rate": 1.742594514774504e-05, "loss": 0.5298, "step": 3166 }, { "epoch": 0.8015692229815237, "grad_norm": 0.1469186395406723, "learning_rate": 1.7424341357207015e-05, "loss": 0.5416, "step": 3167 }, { "epoch": 0.8018223234624146, "grad_norm": 0.15198953449726105, "learning_rate": 1.742273714104725e-05, "loss": 0.5843, "step": 3168 }, { "epoch": 0.8020754239433054, "grad_norm": 0.14566631615161896, "learning_rate": 1.7421132499357706e-05, "loss": 0.5275, "step": 3169 }, { "epoch": 0.8023285244241964, "grad_norm": 0.14933887124061584, "learning_rate": 1.7419527432230378e-05, "loss": 0.5412, "step": 3170 }, { "epoch": 0.8025816249050873, "grad_norm": 0.15247420966625214, "learning_rate": 1.7417921939757284e-05, "loss": 0.5539, "step": 3171 }, { "epoch": 0.8028347253859782, "grad_norm": 0.14642424881458282, "learning_rate": 1.7416316022030458e-05, "loss": 0.5373, "step": 3172 }, { "epoch": 0.8030878258668691, "grad_norm": 0.14506737887859344, "learning_rate": 1.7414709679141966e-05, "loss": 0.5391, "step": 3173 }, { "epoch": 0.8033409263477601, "grad_norm": 0.13656027615070343, "learning_rate": 1.7413102911183895e-05, "loss": 0.529, "step": 3174 }, { "epoch": 0.803594026828651, "grad_norm": 0.15322208404541016, "learning_rate": 1.7411495718248364e-05, "loss": 0.5286, "step": 3175 }, { "epoch": 0.8038471273095419, "grad_norm": 0.24947340786457062, "learning_rate": 1.74098881004275e-05, "loss": 0.4954, "step": 3176 }, { "epoch": 0.8041002277904328, "grad_norm": 0.14178350567817688, "learning_rate": 1.7408280057813474e-05, "loss": 0.5062, "step": 3177 }, { "epoch": 0.8043533282713237, "grad_norm": 0.148432195186615, "learning_rate": 1.7406671590498466e-05, "loss": 0.5114, "step": 3178 }, { "epoch": 0.8046064287522147, "grad_norm": 0.15027928352355957, "learning_rate": 1.7405062698574685e-05, "loss": 0.5337, "step": 3179 }, { "epoch": 0.8048595292331056, "grad_norm": 0.14550426602363586, "learning_rate": 1.740345338213437e-05, "loss": 0.5211, "step": 3180 }, { "epoch": 0.8051126297139964, "grad_norm": 0.14625242352485657, "learning_rate": 1.7401843641269773e-05, "loss": 0.5349, "step": 3181 }, { "epoch": 0.8053657301948873, "grad_norm": 0.14504191279411316, "learning_rate": 1.7400233476073184e-05, "loss": 0.5092, "step": 3182 }, { "epoch": 0.8056188306757783, "grad_norm": 0.14711694419384003, "learning_rate": 1.7398622886636908e-05, "loss": 0.5212, "step": 3183 }, { "epoch": 0.8058719311566692, "grad_norm": 0.1485500931739807, "learning_rate": 1.7397011873053274e-05, "loss": 0.5389, "step": 3184 }, { "epoch": 0.8061250316375601, "grad_norm": 0.15183328092098236, "learning_rate": 1.7395400435414643e-05, "loss": 0.5496, "step": 3185 }, { "epoch": 0.806378132118451, "grad_norm": 0.14625895023345947, "learning_rate": 1.739378857381339e-05, "loss": 0.5241, "step": 3186 }, { "epoch": 0.8066312325993419, "grad_norm": 0.15633103251457214, "learning_rate": 1.7392176288341925e-05, "loss": 0.5152, "step": 3187 }, { "epoch": 0.8068843330802329, "grad_norm": 0.1578294336795807, "learning_rate": 1.739056357909267e-05, "loss": 0.5136, "step": 3188 }, { "epoch": 0.8071374335611238, "grad_norm": 0.16467800736427307, "learning_rate": 1.7388950446158083e-05, "loss": 0.5284, "step": 3189 }, { "epoch": 0.8073905340420147, "grad_norm": 0.14969292283058167, "learning_rate": 1.7387336889630645e-05, "loss": 0.535, "step": 3190 }, { "epoch": 0.8076436345229056, "grad_norm": 0.1427565962076187, "learning_rate": 1.738572290960285e-05, "loss": 0.5158, "step": 3191 }, { "epoch": 0.8078967350037966, "grad_norm": 0.14638501405715942, "learning_rate": 1.7384108506167225e-05, "loss": 0.5222, "step": 3192 }, { "epoch": 0.8081498354846874, "grad_norm": 0.14268317818641663, "learning_rate": 1.7382493679416327e-05, "loss": 0.5272, "step": 3193 }, { "epoch": 0.8084029359655783, "grad_norm": 0.1447330117225647, "learning_rate": 1.7380878429442727e-05, "loss": 0.535, "step": 3194 }, { "epoch": 0.8086560364464692, "grad_norm": 0.1458996683359146, "learning_rate": 1.737926275633902e-05, "loss": 0.5487, "step": 3195 }, { "epoch": 0.8089091369273602, "grad_norm": 0.14721274375915527, "learning_rate": 1.7377646660197832e-05, "loss": 0.5311, "step": 3196 }, { "epoch": 0.8091622374082511, "grad_norm": 0.14739663898944855, "learning_rate": 1.7376030141111816e-05, "loss": 0.5212, "step": 3197 }, { "epoch": 0.809415337889142, "grad_norm": 0.17076340317726135, "learning_rate": 1.7374413199173634e-05, "loss": 0.5319, "step": 3198 }, { "epoch": 0.8096684383700329, "grad_norm": 0.14928781986236572, "learning_rate": 1.737279583447599e-05, "loss": 0.557, "step": 3199 }, { "epoch": 0.8099215388509238, "grad_norm": 0.14964807033538818, "learning_rate": 1.7371178047111594e-05, "loss": 0.5267, "step": 3200 }, { "epoch": 0.8101746393318148, "grad_norm": 0.14851973950862885, "learning_rate": 1.73695598371732e-05, "loss": 0.5376, "step": 3201 }, { "epoch": 0.8104277398127057, "grad_norm": 0.15421175956726074, "learning_rate": 1.7367941204753575e-05, "loss": 0.5065, "step": 3202 }, { "epoch": 0.8106808402935965, "grad_norm": 0.1504693329334259, "learning_rate": 1.7366322149945506e-05, "loss": 0.5075, "step": 3203 }, { "epoch": 0.8109339407744874, "grad_norm": 0.15099111199378967, "learning_rate": 1.7364702672841816e-05, "loss": 0.5157, "step": 3204 }, { "epoch": 0.8111870412553784, "grad_norm": 0.16988548636436462, "learning_rate": 1.7363082773535347e-05, "loss": 0.5494, "step": 3205 }, { "epoch": 0.8114401417362693, "grad_norm": 0.1674482673406601, "learning_rate": 1.736146245211896e-05, "loss": 0.5345, "step": 3206 }, { "epoch": 0.8116932422171602, "grad_norm": 0.1654837727546692, "learning_rate": 1.7359841708685543e-05, "loss": 0.5403, "step": 3207 }, { "epoch": 0.8119463426980511, "grad_norm": 0.14797748625278473, "learning_rate": 1.7358220543328014e-05, "loss": 0.5348, "step": 3208 }, { "epoch": 0.812199443178942, "grad_norm": 0.14437085390090942, "learning_rate": 1.735659895613931e-05, "loss": 0.5212, "step": 3209 }, { "epoch": 0.812452543659833, "grad_norm": 0.14685693383216858, "learning_rate": 1.7354976947212395e-05, "loss": 0.5458, "step": 3210 }, { "epoch": 0.8127056441407239, "grad_norm": 0.15425729751586914, "learning_rate": 1.735335451664025e-05, "loss": 0.5454, "step": 3211 }, { "epoch": 0.8129587446216148, "grad_norm": 0.14977982640266418, "learning_rate": 1.735173166451589e-05, "loss": 0.5311, "step": 3212 }, { "epoch": 0.8132118451025057, "grad_norm": 0.14677029848098755, "learning_rate": 1.735010839093235e-05, "loss": 0.5359, "step": 3213 }, { "epoch": 0.8134649455833967, "grad_norm": 0.14466652274131775, "learning_rate": 1.7348484695982684e-05, "loss": 0.5378, "step": 3214 }, { "epoch": 0.8137180460642875, "grad_norm": 0.14722996950149536, "learning_rate": 1.7346860579759984e-05, "loss": 0.5162, "step": 3215 }, { "epoch": 0.8139711465451784, "grad_norm": 0.14407674968242645, "learning_rate": 1.7345236042357346e-05, "loss": 0.529, "step": 3216 }, { "epoch": 0.8142242470260693, "grad_norm": 0.15334564447402954, "learning_rate": 1.734361108386791e-05, "loss": 0.5351, "step": 3217 }, { "epoch": 0.8144773475069602, "grad_norm": 0.14499294757843018, "learning_rate": 1.7341985704384827e-05, "loss": 0.5332, "step": 3218 }, { "epoch": 0.8147304479878512, "grad_norm": 0.14575597643852234, "learning_rate": 1.734035990400128e-05, "loss": 0.5214, "step": 3219 }, { "epoch": 0.8149835484687421, "grad_norm": 0.14648140966892242, "learning_rate": 1.7338733682810468e-05, "loss": 0.5355, "step": 3220 }, { "epoch": 0.815236648949633, "grad_norm": 0.14309579133987427, "learning_rate": 1.733710704090562e-05, "loss": 0.508, "step": 3221 }, { "epoch": 0.8154897494305239, "grad_norm": 0.14522792398929596, "learning_rate": 1.733547997837999e-05, "loss": 0.5297, "step": 3222 }, { "epoch": 0.8157428499114149, "grad_norm": 0.1431078463792801, "learning_rate": 1.7333852495326852e-05, "loss": 0.507, "step": 3223 }, { "epoch": 0.8159959503923058, "grad_norm": 0.1608077734708786, "learning_rate": 1.733222459183951e-05, "loss": 0.5215, "step": 3224 }, { "epoch": 0.8162490508731967, "grad_norm": 0.15187807381153107, "learning_rate": 1.7330596268011283e-05, "loss": 0.5548, "step": 3225 }, { "epoch": 0.8165021513540875, "grad_norm": 0.14093150198459625, "learning_rate": 1.7328967523935525e-05, "loss": 0.517, "step": 3226 }, { "epoch": 0.8167552518349784, "grad_norm": 0.1472300887107849, "learning_rate": 1.7327338359705606e-05, "loss": 0.5161, "step": 3227 }, { "epoch": 0.8170083523158694, "grad_norm": 0.1484699696302414, "learning_rate": 1.7325708775414917e-05, "loss": 0.5565, "step": 3228 }, { "epoch": 0.8172614527967603, "grad_norm": 0.17139586806297302, "learning_rate": 1.7324078771156887e-05, "loss": 0.5184, "step": 3229 }, { "epoch": 0.8175145532776512, "grad_norm": 0.14693978428840637, "learning_rate": 1.7322448347024957e-05, "loss": 0.5205, "step": 3230 }, { "epoch": 0.8177676537585421, "grad_norm": 0.14895689487457275, "learning_rate": 1.7320817503112595e-05, "loss": 0.5273, "step": 3231 }, { "epoch": 0.8180207542394331, "grad_norm": 0.14781410992145538, "learning_rate": 1.7319186239513294e-05, "loss": 0.5613, "step": 3232 }, { "epoch": 0.818273854720324, "grad_norm": 0.14414310455322266, "learning_rate": 1.7317554556320573e-05, "loss": 0.5353, "step": 3233 }, { "epoch": 0.8185269552012149, "grad_norm": 0.15891197323799133, "learning_rate": 1.731592245362797e-05, "loss": 0.5516, "step": 3234 }, { "epoch": 0.8187800556821058, "grad_norm": 0.14879481494426727, "learning_rate": 1.7314289931529055e-05, "loss": 0.5143, "step": 3235 }, { "epoch": 0.8190331561629967, "grad_norm": 0.15288427472114563, "learning_rate": 1.7312656990117413e-05, "loss": 0.5373, "step": 3236 }, { "epoch": 0.8192862566438877, "grad_norm": 0.1443529576063156, "learning_rate": 1.7311023629486653e-05, "loss": 0.5258, "step": 3237 }, { "epoch": 0.8195393571247785, "grad_norm": 0.1469467282295227, "learning_rate": 1.7309389849730423e-05, "loss": 0.5344, "step": 3238 }, { "epoch": 0.8197924576056694, "grad_norm": 0.14776411652565002, "learning_rate": 1.730775565094237e-05, "loss": 0.5424, "step": 3239 }, { "epoch": 0.8200455580865603, "grad_norm": 0.14843793213367462, "learning_rate": 1.7306121033216198e-05, "loss": 0.5512, "step": 3240 }, { "epoch": 0.8202986585674513, "grad_norm": 0.1491561084985733, "learning_rate": 1.7304485996645595e-05, "loss": 0.5534, "step": 3241 }, { "epoch": 0.8205517590483422, "grad_norm": 0.1519213169813156, "learning_rate": 1.730285054132431e-05, "loss": 0.5349, "step": 3242 }, { "epoch": 0.8208048595292331, "grad_norm": 0.14969325065612793, "learning_rate": 1.7301214667346093e-05, "loss": 0.5252, "step": 3243 }, { "epoch": 0.821057960010124, "grad_norm": 0.14714865386486053, "learning_rate": 1.729957837480473e-05, "loss": 0.521, "step": 3244 }, { "epoch": 0.821311060491015, "grad_norm": 0.14812898635864258, "learning_rate": 1.7297941663794016e-05, "loss": 0.5325, "step": 3245 }, { "epoch": 0.8215641609719059, "grad_norm": 0.17630314826965332, "learning_rate": 1.7296304534407794e-05, "loss": 0.5195, "step": 3246 }, { "epoch": 0.8218172614527968, "grad_norm": 0.14810848236083984, "learning_rate": 1.7294666986739908e-05, "loss": 0.5131, "step": 3247 }, { "epoch": 0.8220703619336877, "grad_norm": 0.1505279690027237, "learning_rate": 1.7293029020884236e-05, "loss": 0.535, "step": 3248 }, { "epoch": 0.8223234624145785, "grad_norm": 0.17762263119220734, "learning_rate": 1.7291390636934683e-05, "loss": 0.5364, "step": 3249 }, { "epoch": 0.8225765628954695, "grad_norm": 0.14034011960029602, "learning_rate": 1.728975183498517e-05, "loss": 0.4963, "step": 3250 }, { "epoch": 0.8228296633763604, "grad_norm": 0.14482980966567993, "learning_rate": 1.7288112615129645e-05, "loss": 0.5497, "step": 3251 }, { "epoch": 0.8230827638572513, "grad_norm": 0.1575547158718109, "learning_rate": 1.7286472977462087e-05, "loss": 0.5212, "step": 3252 }, { "epoch": 0.8233358643381422, "grad_norm": 0.1547786146402359, "learning_rate": 1.7284832922076487e-05, "loss": 0.5203, "step": 3253 }, { "epoch": 0.8235889648190332, "grad_norm": 0.1497178077697754, "learning_rate": 1.728319244906687e-05, "loss": 0.5195, "step": 3254 }, { "epoch": 0.8238420652999241, "grad_norm": 0.15711848437786102, "learning_rate": 1.728155155852728e-05, "loss": 0.5324, "step": 3255 }, { "epoch": 0.824095165780815, "grad_norm": 0.1447044163942337, "learning_rate": 1.7279910250551784e-05, "loss": 0.5522, "step": 3256 }, { "epoch": 0.8243482662617059, "grad_norm": 0.14661699533462524, "learning_rate": 1.7278268525234478e-05, "loss": 0.529, "step": 3257 }, { "epoch": 0.8246013667425968, "grad_norm": 0.1551191210746765, "learning_rate": 1.7276626382669476e-05, "loss": 0.5019, "step": 3258 }, { "epoch": 0.8248544672234878, "grad_norm": 0.14299927651882172, "learning_rate": 1.7274983822950916e-05, "loss": 0.5204, "step": 3259 }, { "epoch": 0.8251075677043787, "grad_norm": 0.14080898463726044, "learning_rate": 1.7273340846172967e-05, "loss": 0.4934, "step": 3260 }, { "epoch": 0.8253606681852695, "grad_norm": 0.14775314927101135, "learning_rate": 1.7271697452429816e-05, "loss": 0.5323, "step": 3261 }, { "epoch": 0.8256137686661604, "grad_norm": 0.15185517072677612, "learning_rate": 1.727005364181568e-05, "loss": 0.5701, "step": 3262 }, { "epoch": 0.8258668691470514, "grad_norm": 0.14407968521118164, "learning_rate": 1.726840941442478e-05, "loss": 0.5227, "step": 3263 }, { "epoch": 0.8261199696279423, "grad_norm": 0.15256400406360626, "learning_rate": 1.7266764770351394e-05, "loss": 0.5361, "step": 3264 }, { "epoch": 0.8263730701088332, "grad_norm": 0.16510039567947388, "learning_rate": 1.7265119709689794e-05, "loss": 0.5447, "step": 3265 }, { "epoch": 0.8266261705897241, "grad_norm": 0.24316293001174927, "learning_rate": 1.7263474232534295e-05, "loss": 0.5298, "step": 3266 }, { "epoch": 0.826879271070615, "grad_norm": 0.15302090346813202, "learning_rate": 1.7261828338979226e-05, "loss": 0.5348, "step": 3267 }, { "epoch": 0.827132371551506, "grad_norm": 0.1483800858259201, "learning_rate": 1.726018202911894e-05, "loss": 0.5187, "step": 3268 }, { "epoch": 0.8273854720323969, "grad_norm": 0.14614009857177734, "learning_rate": 1.7258535303047822e-05, "loss": 0.5164, "step": 3269 }, { "epoch": 0.8276385725132878, "grad_norm": 0.14864173531532288, "learning_rate": 1.7256888160860272e-05, "loss": 0.5293, "step": 3270 }, { "epoch": 0.8278916729941787, "grad_norm": 0.14359018206596375, "learning_rate": 1.7255240602650715e-05, "loss": 0.5088, "step": 3271 }, { "epoch": 0.8281447734750697, "grad_norm": 0.14949128031730652, "learning_rate": 1.7253592628513604e-05, "loss": 0.525, "step": 3272 }, { "epoch": 0.8283978739559605, "grad_norm": 0.14719711244106293, "learning_rate": 1.7251944238543418e-05, "loss": 0.5173, "step": 3273 }, { "epoch": 0.8286509744368514, "grad_norm": 0.16194112598896027, "learning_rate": 1.725029543283465e-05, "loss": 0.5512, "step": 3274 }, { "epoch": 0.8289040749177423, "grad_norm": 0.14672024548053741, "learning_rate": 1.724864621148182e-05, "loss": 0.5567, "step": 3275 }, { "epoch": 0.8291571753986332, "grad_norm": 0.14654190838336945, "learning_rate": 1.7246996574579486e-05, "loss": 0.5086, "step": 3276 }, { "epoch": 0.8294102758795242, "grad_norm": 0.14794012904167175, "learning_rate": 1.7245346522222207e-05, "loss": 0.5252, "step": 3277 }, { "epoch": 0.8296633763604151, "grad_norm": 0.14955581724643707, "learning_rate": 1.7243696054504583e-05, "loss": 0.5419, "step": 3278 }, { "epoch": 0.829916476841306, "grad_norm": 0.15737499296665192, "learning_rate": 1.724204517152123e-05, "loss": 0.5238, "step": 3279 }, { "epoch": 0.8301695773221969, "grad_norm": 0.14875125885009766, "learning_rate": 1.7240393873366784e-05, "loss": 0.5243, "step": 3280 }, { "epoch": 0.8304226778030879, "grad_norm": 0.14579667150974274, "learning_rate": 1.7238742160135922e-05, "loss": 0.5218, "step": 3281 }, { "epoch": 0.8306757782839788, "grad_norm": 0.1460975706577301, "learning_rate": 1.7237090031923324e-05, "loss": 0.5135, "step": 3282 }, { "epoch": 0.8309288787648696, "grad_norm": 0.14335250854492188, "learning_rate": 1.723543748882371e-05, "loss": 0.5198, "step": 3283 }, { "epoch": 0.8311819792457605, "grad_norm": 0.14790262281894684, "learning_rate": 1.723378453093181e-05, "loss": 0.5373, "step": 3284 }, { "epoch": 0.8314350797266514, "grad_norm": 0.16086900234222412, "learning_rate": 1.7232131158342386e-05, "loss": 0.5271, "step": 3285 }, { "epoch": 0.8316881802075424, "grad_norm": 0.15057741105556488, "learning_rate": 1.7230477371150224e-05, "loss": 0.5109, "step": 3286 }, { "epoch": 0.8319412806884333, "grad_norm": 0.15472213923931122, "learning_rate": 1.7228823169450137e-05, "loss": 0.5213, "step": 3287 }, { "epoch": 0.8321943811693242, "grad_norm": 0.15323445200920105, "learning_rate": 1.7227168553336947e-05, "loss": 0.5177, "step": 3288 }, { "epoch": 0.8324474816502151, "grad_norm": 0.16149663925170898, "learning_rate": 1.7225513522905512e-05, "loss": 0.5255, "step": 3289 }, { "epoch": 0.8327005821311061, "grad_norm": 0.14462552964687347, "learning_rate": 1.7223858078250718e-05, "loss": 0.502, "step": 3290 }, { "epoch": 0.832953682611997, "grad_norm": 0.14670826494693756, "learning_rate": 1.7222202219467465e-05, "loss": 0.5492, "step": 3291 }, { "epoch": 0.8332067830928879, "grad_norm": 0.1441624015569687, "learning_rate": 1.7220545946650675e-05, "loss": 0.5432, "step": 3292 }, { "epoch": 0.8334598835737788, "grad_norm": 0.15543736517429352, "learning_rate": 1.7218889259895308e-05, "loss": 0.5281, "step": 3293 }, { "epoch": 0.8337129840546698, "grad_norm": 0.1510584056377411, "learning_rate": 1.721723215929633e-05, "loss": 0.5099, "step": 3294 }, { "epoch": 0.8339660845355606, "grad_norm": 0.14710479974746704, "learning_rate": 1.7215574644948742e-05, "loss": 0.4992, "step": 3295 }, { "epoch": 0.8342191850164515, "grad_norm": 0.1451963633298874, "learning_rate": 1.721391671694756e-05, "loss": 0.5309, "step": 3296 }, { "epoch": 0.8344722854973424, "grad_norm": 0.1436157077550888, "learning_rate": 1.721225837538784e-05, "loss": 0.5383, "step": 3297 }, { "epoch": 0.8347253859782333, "grad_norm": 0.14644017815589905, "learning_rate": 1.721059962036465e-05, "loss": 0.5557, "step": 3298 }, { "epoch": 0.8349784864591243, "grad_norm": 0.15466825664043427, "learning_rate": 1.7208940451973074e-05, "loss": 0.5359, "step": 3299 }, { "epoch": 0.8352315869400152, "grad_norm": 0.14470620453357697, "learning_rate": 1.7207280870308233e-05, "loss": 0.5146, "step": 3300 }, { "epoch": 0.8354846874209061, "grad_norm": 0.15013274550437927, "learning_rate": 1.720562087546527e-05, "loss": 0.5022, "step": 3301 }, { "epoch": 0.835737787901797, "grad_norm": 0.1453494131565094, "learning_rate": 1.7203960467539348e-05, "loss": 0.5232, "step": 3302 }, { "epoch": 0.835990888382688, "grad_norm": 0.15376313030719757, "learning_rate": 1.7202299646625653e-05, "loss": 0.5572, "step": 3303 }, { "epoch": 0.8362439888635789, "grad_norm": 0.15615098178386688, "learning_rate": 1.7200638412819396e-05, "loss": 0.5075, "step": 3304 }, { "epoch": 0.8364970893444698, "grad_norm": 0.14668036997318268, "learning_rate": 1.7198976766215813e-05, "loss": 0.5481, "step": 3305 }, { "epoch": 0.8367501898253606, "grad_norm": 0.14969007670879364, "learning_rate": 1.7197314706910164e-05, "loss": 0.5528, "step": 3306 }, { "epoch": 0.8370032903062515, "grad_norm": 0.18027009069919586, "learning_rate": 1.719565223499773e-05, "loss": 0.502, "step": 3307 }, { "epoch": 0.8372563907871425, "grad_norm": 0.1496511697769165, "learning_rate": 1.719398935057382e-05, "loss": 0.5427, "step": 3308 }, { "epoch": 0.8375094912680334, "grad_norm": 0.1542242020368576, "learning_rate": 1.7192326053733757e-05, "loss": 0.5389, "step": 3309 }, { "epoch": 0.8377625917489243, "grad_norm": 0.14819049835205078, "learning_rate": 1.71906623445729e-05, "loss": 0.5316, "step": 3310 }, { "epoch": 0.8380156922298152, "grad_norm": 0.1437612771987915, "learning_rate": 1.718899822318662e-05, "loss": 0.5434, "step": 3311 }, { "epoch": 0.8382687927107062, "grad_norm": 0.18938961625099182, "learning_rate": 1.7187333689670324e-05, "loss": 0.5158, "step": 3312 }, { "epoch": 0.8385218931915971, "grad_norm": 0.14875753223896027, "learning_rate": 1.7185668744119433e-05, "loss": 0.5239, "step": 3313 }, { "epoch": 0.838774993672488, "grad_norm": 0.17988501489162445, "learning_rate": 1.7184003386629397e-05, "loss": 0.5066, "step": 3314 }, { "epoch": 0.8390280941533789, "grad_norm": 0.14523835480213165, "learning_rate": 1.7182337617295685e-05, "loss": 0.5136, "step": 3315 }, { "epoch": 0.8392811946342698, "grad_norm": 0.16122743487358093, "learning_rate": 1.7180671436213793e-05, "loss": 0.517, "step": 3316 }, { "epoch": 0.8395342951151608, "grad_norm": 0.15069933235645294, "learning_rate": 1.717900484347924e-05, "loss": 0.5515, "step": 3317 }, { "epoch": 0.8397873955960516, "grad_norm": 0.14569856226444244, "learning_rate": 1.7177337839187566e-05, "loss": 0.5173, "step": 3318 }, { "epoch": 0.8400404960769425, "grad_norm": 0.1768755167722702, "learning_rate": 1.7175670423434342e-05, "loss": 0.5326, "step": 3319 }, { "epoch": 0.8402935965578334, "grad_norm": 0.14865732192993164, "learning_rate": 1.7174002596315153e-05, "loss": 0.544, "step": 3320 }, { "epoch": 0.8405466970387244, "grad_norm": 0.1527811735868454, "learning_rate": 1.717233435792561e-05, "loss": 0.547, "step": 3321 }, { "epoch": 0.8407997975196153, "grad_norm": 0.14593443274497986, "learning_rate": 1.7170665708361357e-05, "loss": 0.5048, "step": 3322 }, { "epoch": 0.8410528980005062, "grad_norm": 0.1512899398803711, "learning_rate": 1.7168996647718045e-05, "loss": 0.5298, "step": 3323 }, { "epoch": 0.8413059984813971, "grad_norm": 0.14689087867736816, "learning_rate": 1.7167327176091365e-05, "loss": 0.5352, "step": 3324 }, { "epoch": 0.841559098962288, "grad_norm": 0.1569533348083496, "learning_rate": 1.716565729357702e-05, "loss": 0.5072, "step": 3325 }, { "epoch": 0.841812199443179, "grad_norm": 0.14808864891529083, "learning_rate": 1.7163987000270747e-05, "loss": 0.5568, "step": 3326 }, { "epoch": 0.8420652999240699, "grad_norm": 0.15098944306373596, "learning_rate": 1.716231629626829e-05, "loss": 0.5224, "step": 3327 }, { "epoch": 0.8423184004049608, "grad_norm": 0.14621658623218536, "learning_rate": 1.7160645181665432e-05, "loss": 0.5386, "step": 3328 }, { "epoch": 0.8425715008858516, "grad_norm": 0.1677350252866745, "learning_rate": 1.715897365655798e-05, "loss": 0.5322, "step": 3329 }, { "epoch": 0.8428246013667426, "grad_norm": 0.26365259289741516, "learning_rate": 1.7157301721041752e-05, "loss": 0.5288, "step": 3330 }, { "epoch": 0.8430777018476335, "grad_norm": 0.14822180569171906, "learning_rate": 1.7155629375212602e-05, "loss": 0.5216, "step": 3331 }, { "epoch": 0.8433308023285244, "grad_norm": 0.14585661888122559, "learning_rate": 1.7153956619166395e-05, "loss": 0.5387, "step": 3332 }, { "epoch": 0.8435839028094153, "grad_norm": 0.1485254466533661, "learning_rate": 1.7152283452999033e-05, "loss": 0.5345, "step": 3333 }, { "epoch": 0.8438370032903062, "grad_norm": 0.1552872657775879, "learning_rate": 1.715060987680643e-05, "loss": 0.5132, "step": 3334 }, { "epoch": 0.8440901037711972, "grad_norm": 0.14705923199653625, "learning_rate": 1.714893589068453e-05, "loss": 0.5186, "step": 3335 }, { "epoch": 0.8443432042520881, "grad_norm": 0.1518663763999939, "learning_rate": 1.7147261494729303e-05, "loss": 0.5358, "step": 3336 }, { "epoch": 0.844596304732979, "grad_norm": 0.1611744910478592, "learning_rate": 1.7145586689036734e-05, "loss": 0.5435, "step": 3337 }, { "epoch": 0.8448494052138699, "grad_norm": 0.14860674738883972, "learning_rate": 1.7143911473702837e-05, "loss": 0.5276, "step": 3338 }, { "epoch": 0.8451025056947609, "grad_norm": 0.1732797622680664, "learning_rate": 1.714223584882365e-05, "loss": 0.5404, "step": 3339 }, { "epoch": 0.8453556061756518, "grad_norm": 0.1530172973871231, "learning_rate": 1.714055981449523e-05, "loss": 0.5241, "step": 3340 }, { "epoch": 0.8456087066565426, "grad_norm": 0.1485951542854309, "learning_rate": 1.7138883370813667e-05, "loss": 0.5515, "step": 3341 }, { "epoch": 0.8458618071374335, "grad_norm": 0.15209129452705383, "learning_rate": 1.7137206517875062e-05, "loss": 0.5398, "step": 3342 }, { "epoch": 0.8461149076183245, "grad_norm": 0.15377309918403625, "learning_rate": 1.7135529255775546e-05, "loss": 0.5234, "step": 3343 }, { "epoch": 0.8463680080992154, "grad_norm": 0.14883680641651154, "learning_rate": 1.7133851584611276e-05, "loss": 0.514, "step": 3344 }, { "epoch": 0.8466211085801063, "grad_norm": 0.16214464604854584, "learning_rate": 1.7132173504478425e-05, "loss": 0.5718, "step": 3345 }, { "epoch": 0.8468742090609972, "grad_norm": 0.15174496173858643, "learning_rate": 1.7130495015473196e-05, "loss": 0.523, "step": 3346 }, { "epoch": 0.8471273095418881, "grad_norm": 0.14569640159606934, "learning_rate": 1.7128816117691814e-05, "loss": 0.5099, "step": 3347 }, { "epoch": 0.8473804100227791, "grad_norm": 0.15412220358848572, "learning_rate": 1.7127136811230527e-05, "loss": 0.5391, "step": 3348 }, { "epoch": 0.84763351050367, "grad_norm": 0.14210274815559387, "learning_rate": 1.7125457096185605e-05, "loss": 0.5132, "step": 3349 }, { "epoch": 0.8478866109845609, "grad_norm": 0.14505943655967712, "learning_rate": 1.7123776972653342e-05, "loss": 0.508, "step": 3350 }, { "epoch": 0.8481397114654518, "grad_norm": 0.15040002763271332, "learning_rate": 1.7122096440730052e-05, "loss": 0.5218, "step": 3351 }, { "epoch": 0.8483928119463428, "grad_norm": 0.14346162974834442, "learning_rate": 1.7120415500512088e-05, "loss": 0.4979, "step": 3352 }, { "epoch": 0.8486459124272336, "grad_norm": 0.17275333404541016, "learning_rate": 1.7118734152095803e-05, "loss": 0.551, "step": 3353 }, { "epoch": 0.8488990129081245, "grad_norm": 0.14918582141399384, "learning_rate": 1.711705239557759e-05, "loss": 0.5649, "step": 3354 }, { "epoch": 0.8491521133890154, "grad_norm": 0.14397351443767548, "learning_rate": 1.7115370231053864e-05, "loss": 0.511, "step": 3355 }, { "epoch": 0.8494052138699063, "grad_norm": 0.14942991733551025, "learning_rate": 1.7113687658621052e-05, "loss": 0.527, "step": 3356 }, { "epoch": 0.8496583143507973, "grad_norm": 0.1508914977312088, "learning_rate": 1.711200467837562e-05, "loss": 0.5517, "step": 3357 }, { "epoch": 0.8499114148316882, "grad_norm": 0.15089473128318787, "learning_rate": 1.7110321290414044e-05, "loss": 0.528, "step": 3358 }, { "epoch": 0.8501645153125791, "grad_norm": 0.3767695128917694, "learning_rate": 1.7108637494832828e-05, "loss": 0.5304, "step": 3359 }, { "epoch": 0.85041761579347, "grad_norm": 0.1531301587820053, "learning_rate": 1.7106953291728507e-05, "loss": 0.4992, "step": 3360 }, { "epoch": 0.850670716274361, "grad_norm": 0.1518569141626358, "learning_rate": 1.7105268681197628e-05, "loss": 0.5077, "step": 3361 }, { "epoch": 0.8509238167552519, "grad_norm": 0.1432340443134308, "learning_rate": 1.7103583663336766e-05, "loss": 0.5153, "step": 3362 }, { "epoch": 0.8511769172361427, "grad_norm": 0.1390077918767929, "learning_rate": 1.7101898238242525e-05, "loss": 0.507, "step": 3363 }, { "epoch": 0.8514300177170336, "grad_norm": 0.16909977793693542, "learning_rate": 1.7100212406011524e-05, "loss": 0.5703, "step": 3364 }, { "epoch": 0.8516831181979245, "grad_norm": 0.15069939196109772, "learning_rate": 1.70985261667404e-05, "loss": 0.5155, "step": 3365 }, { "epoch": 0.8519362186788155, "grad_norm": 0.14972269535064697, "learning_rate": 1.7096839520525838e-05, "loss": 0.5285, "step": 3366 }, { "epoch": 0.8521893191597064, "grad_norm": 0.1468164324760437, "learning_rate": 1.709515246746451e-05, "loss": 0.5289, "step": 3367 }, { "epoch": 0.8524424196405973, "grad_norm": 0.14484484493732452, "learning_rate": 1.709346500765315e-05, "loss": 0.5252, "step": 3368 }, { "epoch": 0.8526955201214882, "grad_norm": 0.14705991744995117, "learning_rate": 1.7091777141188485e-05, "loss": 0.5301, "step": 3369 }, { "epoch": 0.8529486206023792, "grad_norm": 0.15270598232746124, "learning_rate": 1.7090088868167278e-05, "loss": 0.551, "step": 3370 }, { "epoch": 0.8532017210832701, "grad_norm": 0.14679788053035736, "learning_rate": 1.7088400188686317e-05, "loss": 0.569, "step": 3371 }, { "epoch": 0.853454821564161, "grad_norm": 0.15219935774803162, "learning_rate": 1.708671110284241e-05, "loss": 0.5224, "step": 3372 }, { "epoch": 0.8537079220450519, "grad_norm": 0.1575467735528946, "learning_rate": 1.708502161073239e-05, "loss": 0.5049, "step": 3373 }, { "epoch": 0.8539610225259427, "grad_norm": 0.14440415799617767, "learning_rate": 1.7083331712453108e-05, "loss": 0.5304, "step": 3374 }, { "epoch": 0.8542141230068337, "grad_norm": 0.14947082102298737, "learning_rate": 1.7081641408101444e-05, "loss": 0.5362, "step": 3375 }, { "epoch": 0.8544672234877246, "grad_norm": 0.14875511825084686, "learning_rate": 1.7079950697774303e-05, "loss": 0.5192, "step": 3376 }, { "epoch": 0.8547203239686155, "grad_norm": 0.14965280890464783, "learning_rate": 1.7078259581568603e-05, "loss": 0.5293, "step": 3377 }, { "epoch": 0.8549734244495064, "grad_norm": 0.15363718569278717, "learning_rate": 1.7076568059581298e-05, "loss": 0.5452, "step": 3378 }, { "epoch": 0.8552265249303974, "grad_norm": 0.15703356266021729, "learning_rate": 1.7074876131909355e-05, "loss": 0.5243, "step": 3379 }, { "epoch": 0.8554796254112883, "grad_norm": 0.14936314523220062, "learning_rate": 1.7073183798649773e-05, "loss": 0.5293, "step": 3380 }, { "epoch": 0.8557327258921792, "grad_norm": 0.16604667901992798, "learning_rate": 1.7071491059899567e-05, "loss": 0.5686, "step": 3381 }, { "epoch": 0.8559858263730701, "grad_norm": 0.14733180403709412, "learning_rate": 1.706979791575578e-05, "loss": 0.5176, "step": 3382 }, { "epoch": 0.856238926853961, "grad_norm": 0.14883117377758026, "learning_rate": 1.7068104366315476e-05, "loss": 0.5362, "step": 3383 }, { "epoch": 0.856492027334852, "grad_norm": 0.17202907800674438, "learning_rate": 1.706641041167574e-05, "loss": 0.539, "step": 3384 }, { "epoch": 0.8567451278157429, "grad_norm": 0.15562129020690918, "learning_rate": 1.7064716051933684e-05, "loss": 0.5317, "step": 3385 }, { "epoch": 0.8569982282966337, "grad_norm": 0.15075981616973877, "learning_rate": 1.7063021287186443e-05, "loss": 0.5421, "step": 3386 }, { "epoch": 0.8572513287775246, "grad_norm": 0.15465915203094482, "learning_rate": 1.7061326117531175e-05, "loss": 0.531, "step": 3387 }, { "epoch": 0.8575044292584156, "grad_norm": 0.14956246316432953, "learning_rate": 1.7059630543065058e-05, "loss": 0.5317, "step": 3388 }, { "epoch": 0.8577575297393065, "grad_norm": 0.15158401429653168, "learning_rate": 1.7057934563885298e-05, "loss": 0.5141, "step": 3389 }, { "epoch": 0.8580106302201974, "grad_norm": 0.14342811703681946, "learning_rate": 1.7056238180089123e-05, "loss": 0.5054, "step": 3390 }, { "epoch": 0.8582637307010883, "grad_norm": 0.1439913660287857, "learning_rate": 1.7054541391773778e-05, "loss": 0.5372, "step": 3391 }, { "epoch": 0.8585168311819793, "grad_norm": 0.14679889380931854, "learning_rate": 1.705284419903654e-05, "loss": 0.5523, "step": 3392 }, { "epoch": 0.8587699316628702, "grad_norm": 0.1553446352481842, "learning_rate": 1.7051146601974707e-05, "loss": 0.498, "step": 3393 }, { "epoch": 0.8590230321437611, "grad_norm": 0.14746935665607452, "learning_rate": 1.7049448600685593e-05, "loss": 0.5152, "step": 3394 }, { "epoch": 0.859276132624652, "grad_norm": 0.15107740461826324, "learning_rate": 1.704775019526655e-05, "loss": 0.533, "step": 3395 }, { "epoch": 0.8595292331055429, "grad_norm": 0.1483919769525528, "learning_rate": 1.7046051385814934e-05, "loss": 0.5497, "step": 3396 }, { "epoch": 0.8597823335864339, "grad_norm": 0.14124512672424316, "learning_rate": 1.7044352172428137e-05, "loss": 0.5559, "step": 3397 }, { "epoch": 0.8600354340673247, "grad_norm": 0.14931374788284302, "learning_rate": 1.704265255520358e-05, "loss": 0.5402, "step": 3398 }, { "epoch": 0.8602885345482156, "grad_norm": 0.14312680065631866, "learning_rate": 1.7040952534238683e-05, "loss": 0.5381, "step": 3399 }, { "epoch": 0.8605416350291065, "grad_norm": 0.15884354710578918, "learning_rate": 1.7039252109630915e-05, "loss": 0.5195, "step": 3400 }, { "epoch": 0.8607947355099975, "grad_norm": 0.15197265148162842, "learning_rate": 1.7037551281477756e-05, "loss": 0.4994, "step": 3401 }, { "epoch": 0.8610478359908884, "grad_norm": 0.1559198796749115, "learning_rate": 1.7035850049876712e-05, "loss": 0.512, "step": 3402 }, { "epoch": 0.8613009364717793, "grad_norm": 0.1487860530614853, "learning_rate": 1.7034148414925308e-05, "loss": 0.5242, "step": 3403 }, { "epoch": 0.8615540369526702, "grad_norm": 0.14872916042804718, "learning_rate": 1.7032446376721097e-05, "loss": 0.5138, "step": 3404 }, { "epoch": 0.8618071374335611, "grad_norm": 0.16058118641376495, "learning_rate": 1.7030743935361652e-05, "loss": 0.5241, "step": 3405 }, { "epoch": 0.8620602379144521, "grad_norm": 0.15902970731258392, "learning_rate": 1.702904109094457e-05, "loss": 0.5281, "step": 3406 }, { "epoch": 0.862313338395343, "grad_norm": 0.15066573023796082, "learning_rate": 1.7027337843567478e-05, "loss": 0.5462, "step": 3407 }, { "epoch": 0.8625664388762339, "grad_norm": 0.15071547031402588, "learning_rate": 1.7025634193328008e-05, "loss": 0.5336, "step": 3408 }, { "epoch": 0.8628195393571247, "grad_norm": 0.14600256085395813, "learning_rate": 1.7023930140323835e-05, "loss": 0.5374, "step": 3409 }, { "epoch": 0.8630726398380157, "grad_norm": 0.1508922576904297, "learning_rate": 1.7022225684652648e-05, "loss": 0.5232, "step": 3410 }, { "epoch": 0.8633257403189066, "grad_norm": 0.1466078758239746, "learning_rate": 1.7020520826412156e-05, "loss": 0.5264, "step": 3411 }, { "epoch": 0.8635788407997975, "grad_norm": 0.1576623171567917, "learning_rate": 1.70188155657001e-05, "loss": 0.5064, "step": 3412 }, { "epoch": 0.8638319412806884, "grad_norm": 1.3963921070098877, "learning_rate": 1.7017109902614234e-05, "loss": 0.5552, "step": 3413 }, { "epoch": 0.8640850417615793, "grad_norm": 0.15045902132987976, "learning_rate": 1.701540383725234e-05, "loss": 0.5103, "step": 3414 }, { "epoch": 0.8643381422424703, "grad_norm": 0.147593155503273, "learning_rate": 1.701369736971223e-05, "loss": 0.5332, "step": 3415 }, { "epoch": 0.8645912427233612, "grad_norm": 0.14418303966522217, "learning_rate": 1.7011990500091723e-05, "loss": 0.5234, "step": 3416 }, { "epoch": 0.8648443432042521, "grad_norm": 0.14922396838665009, "learning_rate": 1.701028322848868e-05, "loss": 0.5325, "step": 3417 }, { "epoch": 0.865097443685143, "grad_norm": 0.1431153565645218, "learning_rate": 1.7008575555000968e-05, "loss": 0.5187, "step": 3418 }, { "epoch": 0.865350544166034, "grad_norm": 0.14386627078056335, "learning_rate": 1.7006867479726486e-05, "loss": 0.5331, "step": 3419 }, { "epoch": 0.8656036446469249, "grad_norm": 0.14942172169685364, "learning_rate": 1.7005159002763153e-05, "loss": 0.5147, "step": 3420 }, { "epoch": 0.8658567451278157, "grad_norm": 0.14960147440433502, "learning_rate": 1.7003450124208915e-05, "loss": 0.5402, "step": 3421 }, { "epoch": 0.8661098456087066, "grad_norm": 0.15860414505004883, "learning_rate": 1.700174084416174e-05, "loss": 0.4954, "step": 3422 }, { "epoch": 0.8663629460895975, "grad_norm": 0.15297777950763702, "learning_rate": 1.700003116271961e-05, "loss": 0.5221, "step": 3423 }, { "epoch": 0.8666160465704885, "grad_norm": 0.1475251168012619, "learning_rate": 1.6998321079980548e-05, "loss": 0.5191, "step": 3424 }, { "epoch": 0.8668691470513794, "grad_norm": 0.14576828479766846, "learning_rate": 1.699661059604258e-05, "loss": 0.5193, "step": 3425 }, { "epoch": 0.8671222475322703, "grad_norm": 0.1448698192834854, "learning_rate": 1.699489971100377e-05, "loss": 0.5347, "step": 3426 }, { "epoch": 0.8673753480131612, "grad_norm": 0.14953352510929108, "learning_rate": 1.6993188424962195e-05, "loss": 0.5475, "step": 3427 }, { "epoch": 0.8676284484940522, "grad_norm": 0.1521630883216858, "learning_rate": 1.699147673801596e-05, "loss": 0.5027, "step": 3428 }, { "epoch": 0.8678815489749431, "grad_norm": 0.14906881749629974, "learning_rate": 1.69897646502632e-05, "loss": 0.5631, "step": 3429 }, { "epoch": 0.868134649455834, "grad_norm": 0.1829783320426941, "learning_rate": 1.6988052161802056e-05, "loss": 0.5442, "step": 3430 }, { "epoch": 0.8683877499367249, "grad_norm": 0.14839577674865723, "learning_rate": 1.6986339272730704e-05, "loss": 0.5088, "step": 3431 }, { "epoch": 0.8686408504176157, "grad_norm": 0.1514139473438263, "learning_rate": 1.6984625983147346e-05, "loss": 0.5364, "step": 3432 }, { "epoch": 0.8688939508985067, "grad_norm": 0.1505652517080307, "learning_rate": 1.698291229315019e-05, "loss": 0.5424, "step": 3433 }, { "epoch": 0.8691470513793976, "grad_norm": 0.150529146194458, "learning_rate": 1.698119820283749e-05, "loss": 0.5398, "step": 3434 }, { "epoch": 0.8694001518602885, "grad_norm": 0.15734519064426422, "learning_rate": 1.6979483712307504e-05, "loss": 0.5216, "step": 3435 }, { "epoch": 0.8696532523411794, "grad_norm": 0.15409469604492188, "learning_rate": 1.697776882165852e-05, "loss": 0.5326, "step": 3436 }, { "epoch": 0.8699063528220704, "grad_norm": 0.14382171630859375, "learning_rate": 1.6976053530988857e-05, "loss": 0.5117, "step": 3437 }, { "epoch": 0.8701594533029613, "grad_norm": 0.15217280387878418, "learning_rate": 1.6974337840396836e-05, "loss": 0.5507, "step": 3438 }, { "epoch": 0.8704125537838522, "grad_norm": 0.14626486599445343, "learning_rate": 1.6972621749980822e-05, "loss": 0.522, "step": 3439 }, { "epoch": 0.8706656542647431, "grad_norm": 0.14933772385120392, "learning_rate": 1.69709052598392e-05, "loss": 0.5086, "step": 3440 }, { "epoch": 0.8709187547456341, "grad_norm": 0.1597888320684433, "learning_rate": 1.696918837007036e-05, "loss": 0.5405, "step": 3441 }, { "epoch": 0.871171855226525, "grad_norm": 0.18638035655021667, "learning_rate": 1.6967471080772734e-05, "loss": 0.5286, "step": 3442 }, { "epoch": 0.8714249557074158, "grad_norm": 0.14873693883419037, "learning_rate": 1.6965753392044772e-05, "loss": 0.5144, "step": 3443 }, { "epoch": 0.8716780561883067, "grad_norm": 0.15111809968948364, "learning_rate": 1.6964035303984944e-05, "loss": 0.5011, "step": 3444 }, { "epoch": 0.8719311566691976, "grad_norm": 0.14639481902122498, "learning_rate": 1.6962316816691745e-05, "loss": 0.5235, "step": 3445 }, { "epoch": 0.8721842571500886, "grad_norm": 0.15030495822429657, "learning_rate": 1.6960597930263692e-05, "loss": 0.5344, "step": 3446 }, { "epoch": 0.8724373576309795, "grad_norm": 0.1590227335691452, "learning_rate": 1.6958878644799326e-05, "loss": 0.5219, "step": 3447 }, { "epoch": 0.8726904581118704, "grad_norm": 0.15427719056606293, "learning_rate": 1.6957158960397207e-05, "loss": 0.5118, "step": 3448 }, { "epoch": 0.8729435585927613, "grad_norm": 0.14708839356899261, "learning_rate": 1.6955438877155923e-05, "loss": 0.5465, "step": 3449 }, { "epoch": 0.8731966590736523, "grad_norm": 0.15478238463401794, "learning_rate": 1.6953718395174083e-05, "loss": 0.5647, "step": 3450 }, { "epoch": 0.8734497595545432, "grad_norm": 0.14843030273914337, "learning_rate": 1.6951997514550318e-05, "loss": 0.498, "step": 3451 }, { "epoch": 0.8737028600354341, "grad_norm": 0.14159445464611053, "learning_rate": 1.6950276235383277e-05, "loss": 0.5286, "step": 3452 }, { "epoch": 0.873955960516325, "grad_norm": 0.1486474871635437, "learning_rate": 1.694855455777165e-05, "loss": 0.5433, "step": 3453 }, { "epoch": 0.8742090609972158, "grad_norm": 0.14780612289905548, "learning_rate": 1.694683248181413e-05, "loss": 0.5018, "step": 3454 }, { "epoch": 0.8744621614781068, "grad_norm": 0.1476944535970688, "learning_rate": 1.6945110007609434e-05, "loss": 0.5325, "step": 3455 }, { "epoch": 0.8747152619589977, "grad_norm": 0.14772316813468933, "learning_rate": 1.6943387135256314e-05, "loss": 0.5074, "step": 3456 }, { "epoch": 0.8749683624398886, "grad_norm": 0.14985302090644836, "learning_rate": 1.694166386485354e-05, "loss": 0.5572, "step": 3457 }, { "epoch": 0.8752214629207795, "grad_norm": 0.14288417994976044, "learning_rate": 1.6939940196499904e-05, "loss": 0.525, "step": 3458 }, { "epoch": 0.8754745634016705, "grad_norm": 0.15138815343379974, "learning_rate": 1.6938216130294217e-05, "loss": 0.5268, "step": 3459 }, { "epoch": 0.8757276638825614, "grad_norm": 0.1470714509487152, "learning_rate": 1.6936491666335315e-05, "loss": 0.5145, "step": 3460 }, { "epoch": 0.8759807643634523, "grad_norm": 0.166818305850029, "learning_rate": 1.6934766804722062e-05, "loss": 0.5407, "step": 3461 }, { "epoch": 0.8762338648443432, "grad_norm": 0.14688996970653534, "learning_rate": 1.6933041545553336e-05, "loss": 0.5394, "step": 3462 }, { "epoch": 0.8764869653252341, "grad_norm": 0.15566755831241608, "learning_rate": 1.6931315888928047e-05, "loss": 0.5442, "step": 3463 }, { "epoch": 0.8767400658061251, "grad_norm": 0.16220787167549133, "learning_rate": 1.6929589834945118e-05, "loss": 0.5196, "step": 3464 }, { "epoch": 0.876993166287016, "grad_norm": 0.15492060780525208, "learning_rate": 1.6927863383703506e-05, "loss": 0.5181, "step": 3465 }, { "epoch": 0.8772462667679068, "grad_norm": 0.15213742852210999, "learning_rate": 1.692613653530218e-05, "loss": 0.519, "step": 3466 }, { "epoch": 0.8774993672487977, "grad_norm": 0.16015225648880005, "learning_rate": 1.6924409289840137e-05, "loss": 0.515, "step": 3467 }, { "epoch": 0.8777524677296887, "grad_norm": 0.15063226222991943, "learning_rate": 1.6922681647416404e-05, "loss": 0.5274, "step": 3468 }, { "epoch": 0.8780055682105796, "grad_norm": 0.14600348472595215, "learning_rate": 1.692095360813001e-05, "loss": 0.5058, "step": 3469 }, { "epoch": 0.8782586686914705, "grad_norm": 0.144175186753273, "learning_rate": 1.6919225172080033e-05, "loss": 0.4963, "step": 3470 }, { "epoch": 0.8785117691723614, "grad_norm": 0.15371057391166687, "learning_rate": 1.6917496339365547e-05, "loss": 0.5396, "step": 3471 }, { "epoch": 0.8787648696532523, "grad_norm": 0.1446683406829834, "learning_rate": 1.6915767110085675e-05, "loss": 0.5455, "step": 3472 }, { "epoch": 0.8790179701341433, "grad_norm": 0.14825096726417542, "learning_rate": 1.6914037484339544e-05, "loss": 0.5222, "step": 3473 }, { "epoch": 0.8792710706150342, "grad_norm": 0.1521778106689453, "learning_rate": 1.6912307462226306e-05, "loss": 0.5532, "step": 3474 }, { "epoch": 0.8795241710959251, "grad_norm": 0.14979393780231476, "learning_rate": 1.691057704384515e-05, "loss": 0.5484, "step": 3475 }, { "epoch": 0.879777271576816, "grad_norm": 0.15452872216701508, "learning_rate": 1.6908846229295267e-05, "loss": 0.516, "step": 3476 }, { "epoch": 0.880030372057707, "grad_norm": 0.15019750595092773, "learning_rate": 1.6907115018675884e-05, "loss": 0.5342, "step": 3477 }, { "epoch": 0.8802834725385978, "grad_norm": 0.15492092072963715, "learning_rate": 1.690538341208625e-05, "loss": 0.5472, "step": 3478 }, { "epoch": 0.8805365730194887, "grad_norm": 0.22255095839500427, "learning_rate": 1.690365140962564e-05, "loss": 0.5288, "step": 3479 }, { "epoch": 0.8807896735003796, "grad_norm": 0.15164735913276672, "learning_rate": 1.6901919011393332e-05, "loss": 0.5284, "step": 3480 }, { "epoch": 0.8810427739812705, "grad_norm": 0.1517164558172226, "learning_rate": 1.6900186217488648e-05, "loss": 0.5831, "step": 3481 }, { "epoch": 0.8812958744621615, "grad_norm": 0.14784644544124603, "learning_rate": 1.6898453028010925e-05, "loss": 0.5238, "step": 3482 }, { "epoch": 0.8815489749430524, "grad_norm": 0.17462372779846191, "learning_rate": 1.6896719443059525e-05, "loss": 0.4889, "step": 3483 }, { "epoch": 0.8818020754239433, "grad_norm": 0.14936992526054382, "learning_rate": 1.6894985462733827e-05, "loss": 0.5319, "step": 3484 }, { "epoch": 0.8820551759048342, "grad_norm": 0.15361931920051575, "learning_rate": 1.689325108713324e-05, "loss": 0.517, "step": 3485 }, { "epoch": 0.8823082763857252, "grad_norm": 0.1460724174976349, "learning_rate": 1.689151631635719e-05, "loss": 0.5471, "step": 3486 }, { "epoch": 0.8825613768666161, "grad_norm": 0.14905647933483124, "learning_rate": 1.6889781150505127e-05, "loss": 0.5226, "step": 3487 }, { "epoch": 0.882814477347507, "grad_norm": 0.14525733888149261, "learning_rate": 1.6888045589676526e-05, "loss": 0.5362, "step": 3488 }, { "epoch": 0.8830675778283978, "grad_norm": 0.14851044118404388, "learning_rate": 1.6886309633970882e-05, "loss": 0.5343, "step": 3489 }, { "epoch": 0.8833206783092888, "grad_norm": 0.15067359805107117, "learning_rate": 1.6884573283487718e-05, "loss": 0.5401, "step": 3490 }, { "epoch": 0.8835737787901797, "grad_norm": 0.1583697348833084, "learning_rate": 1.6882836538326567e-05, "loss": 0.5097, "step": 3491 }, { "epoch": 0.8838268792710706, "grad_norm": 0.1492602527141571, "learning_rate": 1.6881099398586997e-05, "loss": 0.5606, "step": 3492 }, { "epoch": 0.8840799797519615, "grad_norm": 0.14807946979999542, "learning_rate": 1.68793618643686e-05, "loss": 0.5129, "step": 3493 }, { "epoch": 0.8843330802328524, "grad_norm": 0.15059691667556763, "learning_rate": 1.6877623935770977e-05, "loss": 0.5209, "step": 3494 }, { "epoch": 0.8845861807137434, "grad_norm": 0.14356523752212524, "learning_rate": 1.6875885612893763e-05, "loss": 0.5011, "step": 3495 }, { "epoch": 0.8848392811946343, "grad_norm": 0.14546121656894684, "learning_rate": 1.6874146895836615e-05, "loss": 0.5161, "step": 3496 }, { "epoch": 0.8850923816755252, "grad_norm": 0.14519689977169037, "learning_rate": 1.6872407784699204e-05, "loss": 0.5251, "step": 3497 }, { "epoch": 0.8853454821564161, "grad_norm": 0.14190933108329773, "learning_rate": 1.6870668279581232e-05, "loss": 0.5033, "step": 3498 }, { "epoch": 0.8855985826373071, "grad_norm": 0.14906252920627594, "learning_rate": 1.6868928380582424e-05, "loss": 0.5047, "step": 3499 }, { "epoch": 0.885851683118198, "grad_norm": 0.1466013342142105, "learning_rate": 1.686718808780252e-05, "loss": 0.5193, "step": 3500 }, { "epoch": 0.8861047835990888, "grad_norm": 0.15025852620601654, "learning_rate": 1.686544740134129e-05, "loss": 0.5506, "step": 3501 }, { "epoch": 0.8863578840799797, "grad_norm": 0.15151681005954742, "learning_rate": 1.686370632129853e-05, "loss": 0.5384, "step": 3502 }, { "epoch": 0.8866109845608706, "grad_norm": 0.1482160985469818, "learning_rate": 1.686196484777404e-05, "loss": 0.5193, "step": 3503 }, { "epoch": 0.8868640850417616, "grad_norm": 0.15177269279956818, "learning_rate": 1.686022298086766e-05, "loss": 0.5265, "step": 3504 }, { "epoch": 0.8871171855226525, "grad_norm": 0.1474560648202896, "learning_rate": 1.6858480720679257e-05, "loss": 0.5094, "step": 3505 }, { "epoch": 0.8873702860035434, "grad_norm": 0.1532500833272934, "learning_rate": 1.6856738067308695e-05, "loss": 0.4938, "step": 3506 }, { "epoch": 0.8876233864844343, "grad_norm": 0.1511625349521637, "learning_rate": 1.6854995020855886e-05, "loss": 0.5344, "step": 3507 }, { "epoch": 0.8878764869653253, "grad_norm": 0.15170307457447052, "learning_rate": 1.6853251581420755e-05, "loss": 0.5198, "step": 3508 }, { "epoch": 0.8881295874462162, "grad_norm": 0.1520860344171524, "learning_rate": 1.6851507749103245e-05, "loss": 0.5533, "step": 3509 }, { "epoch": 0.8883826879271071, "grad_norm": 0.14674068987369537, "learning_rate": 1.6849763524003334e-05, "loss": 0.5151, "step": 3510 }, { "epoch": 0.888635788407998, "grad_norm": 0.16098830103874207, "learning_rate": 1.684801890622101e-05, "loss": 0.5354, "step": 3511 }, { "epoch": 0.8888888888888888, "grad_norm": 0.14611618220806122, "learning_rate": 1.6846273895856287e-05, "loss": 0.5335, "step": 3512 }, { "epoch": 0.8891419893697798, "grad_norm": 0.1465662717819214, "learning_rate": 1.6844528493009202e-05, "loss": 0.5089, "step": 3513 }, { "epoch": 0.8893950898506707, "grad_norm": 0.14643220603466034, "learning_rate": 1.6842782697779818e-05, "loss": 0.5292, "step": 3514 }, { "epoch": 0.8896481903315616, "grad_norm": 0.20344223082065582, "learning_rate": 1.6841036510268218e-05, "loss": 0.5048, "step": 3515 }, { "epoch": 0.8899012908124525, "grad_norm": 0.15312816202640533, "learning_rate": 1.683928993057451e-05, "loss": 0.5291, "step": 3516 }, { "epoch": 0.8901543912933435, "grad_norm": 0.15690524876117706, "learning_rate": 1.683754295879881e-05, "loss": 0.518, "step": 3517 }, { "epoch": 0.8904074917742344, "grad_norm": 0.15592250227928162, "learning_rate": 1.6835795595041284e-05, "loss": 0.5612, "step": 3518 }, { "epoch": 0.8906605922551253, "grad_norm": 0.1496099978685379, "learning_rate": 1.6834047839402096e-05, "loss": 0.5312, "step": 3519 }, { "epoch": 0.8909136927360162, "grad_norm": 0.17859135568141937, "learning_rate": 1.683229969198144e-05, "loss": 0.5369, "step": 3520 }, { "epoch": 0.8911667932169071, "grad_norm": 0.1483176201581955, "learning_rate": 1.6830551152879534e-05, "loss": 0.5306, "step": 3521 }, { "epoch": 0.8914198936977981, "grad_norm": 0.1436195969581604, "learning_rate": 1.682880222219662e-05, "loss": 0.504, "step": 3522 }, { "epoch": 0.891672994178689, "grad_norm": 0.14228114485740662, "learning_rate": 1.6827052900032963e-05, "loss": 0.5106, "step": 3523 }, { "epoch": 0.8919260946595798, "grad_norm": 0.13983725011348724, "learning_rate": 1.6825303186488843e-05, "loss": 0.5211, "step": 3524 }, { "epoch": 0.8921791951404707, "grad_norm": 0.1424570530653, "learning_rate": 1.6823553081664568e-05, "loss": 0.5133, "step": 3525 }, { "epoch": 0.8924322956213617, "grad_norm": 0.14800359308719635, "learning_rate": 1.6821802585660475e-05, "loss": 0.5283, "step": 3526 }, { "epoch": 0.8926853961022526, "grad_norm": 0.14731517434120178, "learning_rate": 1.6820051698576906e-05, "loss": 0.5271, "step": 3527 }, { "epoch": 0.8929384965831435, "grad_norm": 0.14860732853412628, "learning_rate": 1.681830042051424e-05, "loss": 0.5287, "step": 3528 }, { "epoch": 0.8931915970640344, "grad_norm": 0.14400489628314972, "learning_rate": 1.6816548751572875e-05, "loss": 0.5087, "step": 3529 }, { "epoch": 0.8934446975449253, "grad_norm": 0.1470673680305481, "learning_rate": 1.681479669185323e-05, "loss": 0.5077, "step": 3530 }, { "epoch": 0.8936977980258163, "grad_norm": 0.14760112762451172, "learning_rate": 1.6813044241455747e-05, "loss": 0.514, "step": 3531 }, { "epoch": 0.8939508985067072, "grad_norm": 0.1499701291322708, "learning_rate": 1.6811291400480886e-05, "loss": 0.5066, "step": 3532 }, { "epoch": 0.8942039989875981, "grad_norm": 0.15296226739883423, "learning_rate": 1.680953816902914e-05, "loss": 0.5396, "step": 3533 }, { "epoch": 0.894457099468489, "grad_norm": 0.1517123132944107, "learning_rate": 1.680778454720102e-05, "loss": 0.5158, "step": 3534 }, { "epoch": 0.89471019994938, "grad_norm": 0.14845465123653412, "learning_rate": 1.6806030535097045e-05, "loss": 0.5176, "step": 3535 }, { "epoch": 0.8949633004302708, "grad_norm": 0.15333910286426544, "learning_rate": 1.6804276132817784e-05, "loss": 0.5703, "step": 3536 }, { "epoch": 0.8952164009111617, "grad_norm": 0.14943931996822357, "learning_rate": 1.68025213404638e-05, "loss": 0.5164, "step": 3537 }, { "epoch": 0.8954695013920526, "grad_norm": 0.14630047976970673, "learning_rate": 1.68007661581357e-05, "loss": 0.553, "step": 3538 }, { "epoch": 0.8957226018729436, "grad_norm": 0.1605490893125534, "learning_rate": 1.67990105859341e-05, "loss": 0.5257, "step": 3539 }, { "epoch": 0.8959757023538345, "grad_norm": 0.14859427511692047, "learning_rate": 1.6797254623959648e-05, "loss": 0.5092, "step": 3540 }, { "epoch": 0.8962288028347254, "grad_norm": 0.15891937911510468, "learning_rate": 1.6795498272313005e-05, "loss": 0.5291, "step": 3541 }, { "epoch": 0.8964819033156163, "grad_norm": 0.15164145827293396, "learning_rate": 1.6793741531094862e-05, "loss": 0.5138, "step": 3542 }, { "epoch": 0.8967350037965072, "grad_norm": 0.14699804782867432, "learning_rate": 1.679198440040593e-05, "loss": 0.534, "step": 3543 }, { "epoch": 0.8969881042773982, "grad_norm": 0.1932336390018463, "learning_rate": 1.6790226880346938e-05, "loss": 0.5186, "step": 3544 }, { "epoch": 0.8972412047582891, "grad_norm": 0.14946523308753967, "learning_rate": 1.6788468971018645e-05, "loss": 0.5245, "step": 3545 }, { "epoch": 0.89749430523918, "grad_norm": 0.1494015008211136, "learning_rate": 1.6786710672521823e-05, "loss": 0.5365, "step": 3546 }, { "epoch": 0.8977474057200708, "grad_norm": 0.14856594800949097, "learning_rate": 1.6784951984957272e-05, "loss": 0.5111, "step": 3547 }, { "epoch": 0.8980005062009618, "grad_norm": 0.1525156944990158, "learning_rate": 1.678319290842582e-05, "loss": 0.5072, "step": 3548 }, { "epoch": 0.8982536066818527, "grad_norm": 0.14739874005317688, "learning_rate": 1.6781433443028306e-05, "loss": 0.5156, "step": 3549 }, { "epoch": 0.8985067071627436, "grad_norm": 0.14433911442756653, "learning_rate": 1.67796735888656e-05, "loss": 0.5122, "step": 3550 }, { "epoch": 0.8987598076436345, "grad_norm": 0.1427396535873413, "learning_rate": 1.6777913346038586e-05, "loss": 0.5353, "step": 3551 }, { "epoch": 0.8990129081245254, "grad_norm": 0.14944280683994293, "learning_rate": 1.6776152714648178e-05, "loss": 0.5271, "step": 3552 }, { "epoch": 0.8992660086054164, "grad_norm": 0.15227444469928741, "learning_rate": 1.677439169479531e-05, "loss": 0.5553, "step": 3553 }, { "epoch": 0.8995191090863073, "grad_norm": 0.15288929641246796, "learning_rate": 1.6772630286580938e-05, "loss": 0.5201, "step": 3554 }, { "epoch": 0.8997722095671982, "grad_norm": 0.1477259397506714, "learning_rate": 1.6770868490106035e-05, "loss": 0.5427, "step": 3555 }, { "epoch": 0.9000253100480891, "grad_norm": 0.15653710067272186, "learning_rate": 1.6769106305471602e-05, "loss": 0.5349, "step": 3556 }, { "epoch": 0.90027841052898, "grad_norm": 0.14116887748241425, "learning_rate": 1.6767343732778667e-05, "loss": 0.5183, "step": 3557 }, { "epoch": 0.9005315110098709, "grad_norm": 0.1575784683227539, "learning_rate": 1.6765580772128268e-05, "loss": 0.5231, "step": 3558 }, { "epoch": 0.9007846114907618, "grad_norm": 0.15128043293952942, "learning_rate": 1.676381742362148e-05, "loss": 0.5116, "step": 3559 }, { "epoch": 0.9010377119716527, "grad_norm": 0.15102870762348175, "learning_rate": 1.6762053687359384e-05, "loss": 0.5229, "step": 3560 }, { "epoch": 0.9012908124525436, "grad_norm": 0.14146219193935394, "learning_rate": 1.6760289563443094e-05, "loss": 0.5177, "step": 3561 }, { "epoch": 0.9015439129334346, "grad_norm": 0.14700660109519958, "learning_rate": 1.6758525051973742e-05, "loss": 0.5387, "step": 3562 }, { "epoch": 0.9017970134143255, "grad_norm": 0.1503869742155075, "learning_rate": 1.6756760153052486e-05, "loss": 0.533, "step": 3563 }, { "epoch": 0.9020501138952164, "grad_norm": 0.1457030475139618, "learning_rate": 1.6754994866780502e-05, "loss": 0.514, "step": 3564 }, { "epoch": 0.9023032143761073, "grad_norm": 0.14772522449493408, "learning_rate": 1.6753229193258997e-05, "loss": 0.5597, "step": 3565 }, { "epoch": 0.9025563148569983, "grad_norm": 0.1503283530473709, "learning_rate": 1.6751463132589184e-05, "loss": 0.5361, "step": 3566 }, { "epoch": 0.9028094153378892, "grad_norm": 0.1497548520565033, "learning_rate": 1.674969668487231e-05, "loss": 0.5669, "step": 3567 }, { "epoch": 0.90306251581878, "grad_norm": 0.15100781619548798, "learning_rate": 1.6747929850209645e-05, "loss": 0.5151, "step": 3568 }, { "epoch": 0.9033156162996709, "grad_norm": 0.14710834622383118, "learning_rate": 1.6746162628702472e-05, "loss": 0.5, "step": 3569 }, { "epoch": 0.9035687167805618, "grad_norm": 0.1542469710111618, "learning_rate": 1.674439502045211e-05, "loss": 0.5578, "step": 3570 }, { "epoch": 0.9038218172614528, "grad_norm": 0.15553085505962372, "learning_rate": 1.674262702555988e-05, "loss": 0.5352, "step": 3571 }, { "epoch": 0.9040749177423437, "grad_norm": 0.14215399324893951, "learning_rate": 1.6740858644127153e-05, "loss": 0.5502, "step": 3572 }, { "epoch": 0.9043280182232346, "grad_norm": 0.14907367527484894, "learning_rate": 1.67390898762553e-05, "loss": 0.5246, "step": 3573 }, { "epoch": 0.9045811187041255, "grad_norm": 0.14666853845119476, "learning_rate": 1.6737320722045715e-05, "loss": 0.5213, "step": 3574 }, { "epoch": 0.9048342191850165, "grad_norm": 0.15853703022003174, "learning_rate": 1.6735551181599827e-05, "loss": 0.5466, "step": 3575 }, { "epoch": 0.9050873196659074, "grad_norm": 0.14738543331623077, "learning_rate": 1.6733781255019076e-05, "loss": 0.5472, "step": 3576 }, { "epoch": 0.9053404201467983, "grad_norm": 0.15210974216461182, "learning_rate": 1.673201094240493e-05, "loss": 0.5388, "step": 3577 }, { "epoch": 0.9055935206276892, "grad_norm": 0.14926952123641968, "learning_rate": 1.673024024385888e-05, "loss": 0.5079, "step": 3578 }, { "epoch": 0.90584662110858, "grad_norm": 0.14893130958080292, "learning_rate": 1.672846915948243e-05, "loss": 0.5282, "step": 3579 }, { "epoch": 0.906099721589471, "grad_norm": 0.14674939215183258, "learning_rate": 1.6726697689377112e-05, "loss": 0.4976, "step": 3580 }, { "epoch": 0.9063528220703619, "grad_norm": 0.1544337272644043, "learning_rate": 1.6724925833644495e-05, "loss": 0.5224, "step": 3581 }, { "epoch": 0.9066059225512528, "grad_norm": 0.1453384906053543, "learning_rate": 1.6723153592386137e-05, "loss": 0.511, "step": 3582 }, { "epoch": 0.9068590230321437, "grad_norm": 0.18287062644958496, "learning_rate": 1.6721380965703646e-05, "loss": 0.5394, "step": 3583 }, { "epoch": 0.9071121235130347, "grad_norm": 0.1470341831445694, "learning_rate": 1.671960795369864e-05, "loss": 0.5319, "step": 3584 }, { "epoch": 0.9073652239939256, "grad_norm": 0.14185889065265656, "learning_rate": 1.671783455647277e-05, "loss": 0.5097, "step": 3585 }, { "epoch": 0.9076183244748165, "grad_norm": 0.1518610417842865, "learning_rate": 1.671606077412769e-05, "loss": 0.5246, "step": 3586 }, { "epoch": 0.9078714249557074, "grad_norm": 0.16382010281085968, "learning_rate": 1.6714286606765098e-05, "loss": 0.5506, "step": 3587 }, { "epoch": 0.9081245254365984, "grad_norm": 0.14890503883361816, "learning_rate": 1.671251205448669e-05, "loss": 0.5394, "step": 3588 }, { "epoch": 0.9083776259174893, "grad_norm": 0.15002837777137756, "learning_rate": 1.671073711739421e-05, "loss": 0.5452, "step": 3589 }, { "epoch": 0.9086307263983802, "grad_norm": 0.14891470968723297, "learning_rate": 1.6708961795589406e-05, "loss": 0.533, "step": 3590 }, { "epoch": 0.908883826879271, "grad_norm": 0.1472836583852768, "learning_rate": 1.670718608917405e-05, "loss": 0.5244, "step": 3591 }, { "epoch": 0.9091369273601619, "grad_norm": 0.14605462551116943, "learning_rate": 1.6705409998249947e-05, "loss": 0.5079, "step": 3592 }, { "epoch": 0.9093900278410529, "grad_norm": 0.14506979286670685, "learning_rate": 1.6703633522918908e-05, "loss": 0.5274, "step": 3593 }, { "epoch": 0.9096431283219438, "grad_norm": 0.148104727268219, "learning_rate": 1.6701856663282786e-05, "loss": 0.5499, "step": 3594 }, { "epoch": 0.9098962288028347, "grad_norm": 0.16289789974689484, "learning_rate": 1.670007941944343e-05, "loss": 0.5337, "step": 3595 }, { "epoch": 0.9101493292837256, "grad_norm": 0.15084603428840637, "learning_rate": 1.669830179150274e-05, "loss": 0.5294, "step": 3596 }, { "epoch": 0.9104024297646166, "grad_norm": 0.14832569658756256, "learning_rate": 1.6696523779562614e-05, "loss": 0.5128, "step": 3597 }, { "epoch": 0.9106555302455075, "grad_norm": 0.14730194211006165, "learning_rate": 1.6694745383724984e-05, "loss": 0.5169, "step": 3598 }, { "epoch": 0.9109086307263984, "grad_norm": 0.14623935520648956, "learning_rate": 1.6692966604091804e-05, "loss": 0.5247, "step": 3599 }, { "epoch": 0.9111617312072893, "grad_norm": 0.5078619122505188, "learning_rate": 1.6691187440765044e-05, "loss": 0.5421, "step": 3600 }, { "epoch": 0.9114148316881802, "grad_norm": 0.15417684614658356, "learning_rate": 1.6689407893846702e-05, "loss": 0.5199, "step": 3601 }, { "epoch": 0.9116679321690712, "grad_norm": 0.14338022470474243, "learning_rate": 1.6687627963438798e-05, "loss": 0.5228, "step": 3602 }, { "epoch": 0.911921032649962, "grad_norm": 0.14711850881576538, "learning_rate": 1.6685847649643368e-05, "loss": 0.513, "step": 3603 }, { "epoch": 0.9121741331308529, "grad_norm": 0.1450110822916031, "learning_rate": 1.6684066952562474e-05, "loss": 0.4989, "step": 3604 }, { "epoch": 0.9124272336117438, "grad_norm": 0.14669804275035858, "learning_rate": 1.6682285872298195e-05, "loss": 0.504, "step": 3605 }, { "epoch": 0.9126803340926348, "grad_norm": 0.15428633987903595, "learning_rate": 1.668050440895265e-05, "loss": 0.5345, "step": 3606 }, { "epoch": 0.9129334345735257, "grad_norm": 0.17304518818855286, "learning_rate": 1.6678722562627954e-05, "loss": 0.5385, "step": 3607 }, { "epoch": 0.9131865350544166, "grad_norm": 0.1551153063774109, "learning_rate": 1.6676940333426262e-05, "loss": 0.5421, "step": 3608 }, { "epoch": 0.9134396355353075, "grad_norm": 0.15000928938388824, "learning_rate": 1.6675157721449743e-05, "loss": 0.5319, "step": 3609 }, { "epoch": 0.9136927360161984, "grad_norm": 0.1465054601430893, "learning_rate": 1.6673374726800592e-05, "loss": 0.5426, "step": 3610 }, { "epoch": 0.9139458364970894, "grad_norm": 0.16044995188713074, "learning_rate": 1.6671591349581024e-05, "loss": 0.5261, "step": 3611 }, { "epoch": 0.9141989369779803, "grad_norm": 0.1494501680135727, "learning_rate": 1.6669807589893277e-05, "loss": 0.5438, "step": 3612 }, { "epoch": 0.9144520374588712, "grad_norm": 0.1447419673204422, "learning_rate": 1.6668023447839607e-05, "loss": 0.5166, "step": 3613 }, { "epoch": 0.914705137939762, "grad_norm": 0.14979945123195648, "learning_rate": 1.6666238923522296e-05, "loss": 0.5397, "step": 3614 }, { "epoch": 0.914958238420653, "grad_norm": 0.14727343618869781, "learning_rate": 1.666445401704365e-05, "loss": 0.5271, "step": 3615 }, { "epoch": 0.9152113389015439, "grad_norm": 0.14938634634017944, "learning_rate": 1.6662668728505992e-05, "loss": 0.5385, "step": 3616 }, { "epoch": 0.9154644393824348, "grad_norm": 0.21567074954509735, "learning_rate": 1.6660883058011672e-05, "loss": 0.5465, "step": 3617 }, { "epoch": 0.9157175398633257, "grad_norm": 0.15296199917793274, "learning_rate": 1.6659097005663052e-05, "loss": 0.5444, "step": 3618 }, { "epoch": 0.9159706403442166, "grad_norm": 0.1634710282087326, "learning_rate": 1.665731057156253e-05, "loss": 0.5251, "step": 3619 }, { "epoch": 0.9162237408251076, "grad_norm": 0.14891543984413147, "learning_rate": 1.665552375581251e-05, "loss": 0.5243, "step": 3620 }, { "epoch": 0.9164768413059985, "grad_norm": 0.1558849960565567, "learning_rate": 1.665373655851543e-05, "loss": 0.5047, "step": 3621 }, { "epoch": 0.9167299417868894, "grad_norm": 0.14574921131134033, "learning_rate": 1.6651948979773754e-05, "loss": 0.5377, "step": 3622 }, { "epoch": 0.9169830422677803, "grad_norm": 0.14607584476470947, "learning_rate": 1.665016101968995e-05, "loss": 0.5197, "step": 3623 }, { "epoch": 0.9172361427486713, "grad_norm": 0.15331031382083893, "learning_rate": 1.664837267836652e-05, "loss": 0.5147, "step": 3624 }, { "epoch": 0.9174892432295622, "grad_norm": 0.14590170979499817, "learning_rate": 1.664658395590599e-05, "loss": 0.5443, "step": 3625 }, { "epoch": 0.917742343710453, "grad_norm": 0.14982114732265472, "learning_rate": 1.6644794852410896e-05, "loss": 0.5336, "step": 3626 }, { "epoch": 0.9179954441913439, "grad_norm": 0.1437799483537674, "learning_rate": 1.6643005367983815e-05, "loss": 0.5172, "step": 3627 }, { "epoch": 0.9182485446722348, "grad_norm": 0.1481659859418869, "learning_rate": 1.6641215502727322e-05, "loss": 0.51, "step": 3628 }, { "epoch": 0.9185016451531258, "grad_norm": 0.14843769371509552, "learning_rate": 1.663942525674403e-05, "loss": 0.5126, "step": 3629 }, { "epoch": 0.9187547456340167, "grad_norm": 0.14817391335964203, "learning_rate": 1.663763463013658e-05, "loss": 0.493, "step": 3630 }, { "epoch": 0.9190078461149076, "grad_norm": 0.1455070525407791, "learning_rate": 1.6635843623007612e-05, "loss": 0.5583, "step": 3631 }, { "epoch": 0.9192609465957985, "grad_norm": 0.14654161036014557, "learning_rate": 1.6634052235459807e-05, "loss": 0.5174, "step": 3632 }, { "epoch": 0.9195140470766895, "grad_norm": 0.1489616334438324, "learning_rate": 1.6632260467595855e-05, "loss": 0.5653, "step": 3633 }, { "epoch": 0.9197671475575804, "grad_norm": 0.14681079983711243, "learning_rate": 1.6630468319518485e-05, "loss": 0.5171, "step": 3634 }, { "epoch": 0.9200202480384713, "grad_norm": 0.14830783009529114, "learning_rate": 1.6628675791330428e-05, "loss": 0.5485, "step": 3635 }, { "epoch": 0.9202733485193622, "grad_norm": 0.15489326417446136, "learning_rate": 1.662688288313445e-05, "loss": 0.517, "step": 3636 }, { "epoch": 0.9205264490002532, "grad_norm": 0.1531219482421875, "learning_rate": 1.662508959503333e-05, "loss": 0.5279, "step": 3637 }, { "epoch": 0.920779549481144, "grad_norm": 0.1536158174276352, "learning_rate": 1.6623295927129884e-05, "loss": 0.553, "step": 3638 }, { "epoch": 0.9210326499620349, "grad_norm": 0.1454981416463852, "learning_rate": 1.6621501879526926e-05, "loss": 0.5416, "step": 3639 }, { "epoch": 0.9212857504429258, "grad_norm": 0.14393465220928192, "learning_rate": 1.6619707452327315e-05, "loss": 0.4994, "step": 3640 }, { "epoch": 0.9215388509238167, "grad_norm": 0.14966842532157898, "learning_rate": 1.6617912645633915e-05, "loss": 0.5153, "step": 3641 }, { "epoch": 0.9217919514047077, "grad_norm": 0.14466114342212677, "learning_rate": 1.6616117459549626e-05, "loss": 0.5079, "step": 3642 }, { "epoch": 0.9220450518855986, "grad_norm": 0.15230457484722137, "learning_rate": 1.661432189417735e-05, "loss": 0.5477, "step": 3643 }, { "epoch": 0.9222981523664895, "grad_norm": 0.15053333342075348, "learning_rate": 1.6612525949620034e-05, "loss": 0.5474, "step": 3644 }, { "epoch": 0.9225512528473804, "grad_norm": 0.1569037139415741, "learning_rate": 1.6610729625980634e-05, "loss": 0.5449, "step": 3645 }, { "epoch": 0.9228043533282714, "grad_norm": 0.15597319602966309, "learning_rate": 1.6608932923362126e-05, "loss": 0.5321, "step": 3646 }, { "epoch": 0.9230574538091623, "grad_norm": 0.14706282317638397, "learning_rate": 1.660713584186751e-05, "loss": 0.5087, "step": 3647 }, { "epoch": 0.9233105542900532, "grad_norm": 0.14015620946884155, "learning_rate": 1.6605338381599817e-05, "loss": 0.5065, "step": 3648 }, { "epoch": 0.923563654770944, "grad_norm": 0.14668244123458862, "learning_rate": 1.660354054266208e-05, "loss": 0.5135, "step": 3649 }, { "epoch": 0.9238167552518349, "grad_norm": 0.14619354903697968, "learning_rate": 1.6601742325157374e-05, "loss": 0.5209, "step": 3650 }, { "epoch": 0.9240698557327259, "grad_norm": 0.1468517780303955, "learning_rate": 1.6599943729188788e-05, "loss": 0.5401, "step": 3651 }, { "epoch": 0.9243229562136168, "grad_norm": 0.1478777825832367, "learning_rate": 1.659814475485942e-05, "loss": 0.5466, "step": 3652 }, { "epoch": 0.9245760566945077, "grad_norm": 0.15836574137210846, "learning_rate": 1.659634540227242e-05, "loss": 0.5302, "step": 3653 }, { "epoch": 0.9248291571753986, "grad_norm": 0.1507386863231659, "learning_rate": 1.6594545671530924e-05, "loss": 0.5455, "step": 3654 }, { "epoch": 0.9250822576562896, "grad_norm": 0.14204141497612, "learning_rate": 1.6592745562738113e-05, "loss": 0.5231, "step": 3655 }, { "epoch": 0.9253353581371805, "grad_norm": 0.15151117742061615, "learning_rate": 1.6590945075997186e-05, "loss": 0.5254, "step": 3656 }, { "epoch": 0.9255884586180714, "grad_norm": 0.14500434696674347, "learning_rate": 1.6589144211411357e-05, "loss": 0.5194, "step": 3657 }, { "epoch": 0.9258415590989623, "grad_norm": 0.15036217868328094, "learning_rate": 1.6587342969083867e-05, "loss": 0.5352, "step": 3658 }, { "epoch": 0.9260946595798532, "grad_norm": 0.14913876354694366, "learning_rate": 1.658554134911798e-05, "loss": 0.5438, "step": 3659 }, { "epoch": 0.9263477600607442, "grad_norm": 0.1413276195526123, "learning_rate": 1.6583739351616975e-05, "loss": 0.5293, "step": 3660 }, { "epoch": 0.926600860541635, "grad_norm": 0.15294210612773895, "learning_rate": 1.658193697668416e-05, "loss": 0.5114, "step": 3661 }, { "epoch": 0.9268539610225259, "grad_norm": 0.15567360818386078, "learning_rate": 1.658013422442286e-05, "loss": 0.5246, "step": 3662 }, { "epoch": 0.9271070615034168, "grad_norm": 0.14649812877178192, "learning_rate": 1.6578331094936423e-05, "loss": 0.5083, "step": 3663 }, { "epoch": 0.9273601619843078, "grad_norm": 0.14088179171085358, "learning_rate": 1.657652758832822e-05, "loss": 0.5252, "step": 3664 }, { "epoch": 0.9276132624651987, "grad_norm": 0.14526082575321198, "learning_rate": 1.657472370470164e-05, "loss": 0.5262, "step": 3665 }, { "epoch": 0.9278663629460896, "grad_norm": 0.14780530333518982, "learning_rate": 1.6572919444160093e-05, "loss": 0.5149, "step": 3666 }, { "epoch": 0.9281194634269805, "grad_norm": 0.15143126249313354, "learning_rate": 1.657111480680702e-05, "loss": 0.5299, "step": 3667 }, { "epoch": 0.9283725639078714, "grad_norm": 0.14445963501930237, "learning_rate": 1.6569309792745873e-05, "loss": 0.5063, "step": 3668 }, { "epoch": 0.9286256643887624, "grad_norm": 0.14489617943763733, "learning_rate": 1.6567504402080134e-05, "loss": 0.5001, "step": 3669 }, { "epoch": 0.9288787648696533, "grad_norm": 0.15362706780433655, "learning_rate": 1.6565698634913296e-05, "loss": 0.517, "step": 3670 }, { "epoch": 0.9291318653505442, "grad_norm": 0.15799693763256073, "learning_rate": 1.6563892491348882e-05, "loss": 0.5247, "step": 3671 }, { "epoch": 0.929384965831435, "grad_norm": 0.1488887518644333, "learning_rate": 1.6562085971490437e-05, "loss": 0.5233, "step": 3672 }, { "epoch": 0.929638066312326, "grad_norm": 0.16144323348999023, "learning_rate": 1.6560279075441522e-05, "loss": 0.5007, "step": 3673 }, { "epoch": 0.9298911667932169, "grad_norm": 0.14974623918533325, "learning_rate": 1.6558471803305723e-05, "loss": 0.539, "step": 3674 }, { "epoch": 0.9301442672741078, "grad_norm": 0.1455133706331253, "learning_rate": 1.655666415518665e-05, "loss": 0.5211, "step": 3675 }, { "epoch": 0.9303973677549987, "grad_norm": 0.15050366520881653, "learning_rate": 1.655485613118793e-05, "loss": 0.5491, "step": 3676 }, { "epoch": 0.9306504682358896, "grad_norm": 0.15126065909862518, "learning_rate": 1.6553047731413208e-05, "loss": 0.5299, "step": 3677 }, { "epoch": 0.9309035687167806, "grad_norm": 0.14508146047592163, "learning_rate": 1.6551238955966164e-05, "loss": 0.5337, "step": 3678 }, { "epoch": 0.9311566691976715, "grad_norm": 0.14636819064617157, "learning_rate": 1.6549429804950484e-05, "loss": 0.5469, "step": 3679 }, { "epoch": 0.9314097696785624, "grad_norm": 0.14605185389518738, "learning_rate": 1.6547620278469886e-05, "loss": 0.5174, "step": 3680 }, { "epoch": 0.9316628701594533, "grad_norm": 0.15410538017749786, "learning_rate": 1.6545810376628112e-05, "loss": 0.5187, "step": 3681 }, { "epoch": 0.9319159706403443, "grad_norm": 0.14263054728507996, "learning_rate": 1.654400009952891e-05, "loss": 0.5093, "step": 3682 }, { "epoch": 0.9321690711212351, "grad_norm": 0.15540753304958344, "learning_rate": 1.6542189447276062e-05, "loss": 0.5138, "step": 3683 }, { "epoch": 0.932422171602126, "grad_norm": 0.14605596661567688, "learning_rate": 1.6540378419973374e-05, "loss": 0.503, "step": 3684 }, { "epoch": 0.9326752720830169, "grad_norm": 0.14798161387443542, "learning_rate": 1.6538567017724663e-05, "loss": 0.528, "step": 3685 }, { "epoch": 0.9329283725639079, "grad_norm": 0.1593511402606964, "learning_rate": 1.6536755240633777e-05, "loss": 0.5304, "step": 3686 }, { "epoch": 0.9331814730447988, "grad_norm": 0.15393568575382233, "learning_rate": 1.653494308880458e-05, "loss": 0.5125, "step": 3687 }, { "epoch": 0.9334345735256897, "grad_norm": 0.14946870505809784, "learning_rate": 1.6533130562340957e-05, "loss": 0.5391, "step": 3688 }, { "epoch": 0.9336876740065806, "grad_norm": 0.14910483360290527, "learning_rate": 1.6531317661346813e-05, "loss": 0.5577, "step": 3689 }, { "epoch": 0.9339407744874715, "grad_norm": 0.14952532947063446, "learning_rate": 1.6529504385926086e-05, "loss": 0.5466, "step": 3690 }, { "epoch": 0.9341938749683625, "grad_norm": 0.14770744740962982, "learning_rate": 1.652769073618272e-05, "loss": 0.5334, "step": 3691 }, { "epoch": 0.9344469754492534, "grad_norm": 0.14591385424137115, "learning_rate": 1.6525876712220696e-05, "loss": 0.5506, "step": 3692 }, { "epoch": 0.9347000759301443, "grad_norm": 0.1432468444108963, "learning_rate": 1.6524062314143997e-05, "loss": 0.4886, "step": 3693 }, { "epoch": 0.9349531764110351, "grad_norm": 0.1527366042137146, "learning_rate": 1.6522247542056652e-05, "loss": 0.5242, "step": 3694 }, { "epoch": 0.9352062768919261, "grad_norm": 0.1392030417919159, "learning_rate": 1.6520432396062685e-05, "loss": 0.4892, "step": 3695 }, { "epoch": 0.935459377372817, "grad_norm": 0.1481018364429474, "learning_rate": 1.651861687626616e-05, "loss": 0.5395, "step": 3696 }, { "epoch": 0.9357124778537079, "grad_norm": 0.14707832038402557, "learning_rate": 1.651680098277116e-05, "loss": 0.5346, "step": 3697 }, { "epoch": 0.9359655783345988, "grad_norm": 0.14899218082427979, "learning_rate": 1.6514984715681783e-05, "loss": 0.5683, "step": 3698 }, { "epoch": 0.9362186788154897, "grad_norm": 0.1743021011352539, "learning_rate": 1.651316807510215e-05, "loss": 0.5254, "step": 3699 }, { "epoch": 0.9364717792963807, "grad_norm": 0.14781922101974487, "learning_rate": 1.651135106113641e-05, "loss": 0.564, "step": 3700 }, { "epoch": 0.9367248797772716, "grad_norm": 0.16526828706264496, "learning_rate": 1.6509533673888722e-05, "loss": 0.5316, "step": 3701 }, { "epoch": 0.9369779802581625, "grad_norm": 0.1428469568490982, "learning_rate": 1.650771591346328e-05, "loss": 0.5436, "step": 3702 }, { "epoch": 0.9372310807390534, "grad_norm": 0.14892153441905975, "learning_rate": 1.6505897779964288e-05, "loss": 0.5438, "step": 3703 }, { "epoch": 0.9374841812199444, "grad_norm": 0.14730869233608246, "learning_rate": 1.6504079273495977e-05, "loss": 0.5255, "step": 3704 }, { "epoch": 0.9377372817008353, "grad_norm": 0.15142153203487396, "learning_rate": 1.6502260394162598e-05, "loss": 0.4979, "step": 3705 }, { "epoch": 0.9379903821817261, "grad_norm": 0.14910633862018585, "learning_rate": 1.6500441142068426e-05, "loss": 0.5294, "step": 3706 }, { "epoch": 0.938243482662617, "grad_norm": 0.14492537081241608, "learning_rate": 1.649862151731775e-05, "loss": 0.5155, "step": 3707 }, { "epoch": 0.9384965831435079, "grad_norm": 0.16272975504398346, "learning_rate": 1.6496801520014886e-05, "loss": 0.5372, "step": 3708 }, { "epoch": 0.9387496836243989, "grad_norm": 0.1430039256811142, "learning_rate": 1.6494981150264172e-05, "loss": 0.539, "step": 3709 }, { "epoch": 0.9390027841052898, "grad_norm": 0.15006831288337708, "learning_rate": 1.6493160408169972e-05, "loss": 0.5207, "step": 3710 }, { "epoch": 0.9392558845861807, "grad_norm": 0.1520131528377533, "learning_rate": 1.6491339293836654e-05, "loss": 0.5295, "step": 3711 }, { "epoch": 0.9395089850670716, "grad_norm": 0.15275251865386963, "learning_rate": 1.6489517807368626e-05, "loss": 0.533, "step": 3712 }, { "epoch": 0.9397620855479626, "grad_norm": 0.1537601202726364, "learning_rate": 1.6487695948870307e-05, "loss": 0.548, "step": 3713 }, { "epoch": 0.9400151860288535, "grad_norm": 0.14867492020130157, "learning_rate": 1.648587371844614e-05, "loss": 0.522, "step": 3714 }, { "epoch": 0.9402682865097444, "grad_norm": 0.1484834849834442, "learning_rate": 1.6484051116200594e-05, "loss": 0.5443, "step": 3715 }, { "epoch": 0.9405213869906353, "grad_norm": 0.14887021481990814, "learning_rate": 1.648222814223815e-05, "loss": 0.5181, "step": 3716 }, { "epoch": 0.9407744874715261, "grad_norm": 0.14832769334316254, "learning_rate": 1.6480404796663316e-05, "loss": 0.5076, "step": 3717 }, { "epoch": 0.9410275879524171, "grad_norm": 0.155690997838974, "learning_rate": 1.647858107958062e-05, "loss": 0.5299, "step": 3718 }, { "epoch": 0.941280688433308, "grad_norm": 0.14807718992233276, "learning_rate": 1.6476756991094614e-05, "loss": 0.5245, "step": 3719 }, { "epoch": 0.9415337889141989, "grad_norm": 0.1461859941482544, "learning_rate": 1.6474932531309874e-05, "loss": 0.5091, "step": 3720 }, { "epoch": 0.9417868893950898, "grad_norm": 0.14575421810150146, "learning_rate": 1.6473107700330983e-05, "loss": 0.5237, "step": 3721 }, { "epoch": 0.9420399898759808, "grad_norm": 0.14872266352176666, "learning_rate": 1.6471282498262557e-05, "loss": 0.545, "step": 3722 }, { "epoch": 0.9422930903568717, "grad_norm": 0.15600840747356415, "learning_rate": 1.6469456925209235e-05, "loss": 0.5221, "step": 3723 }, { "epoch": 0.9425461908377626, "grad_norm": 0.15039996802806854, "learning_rate": 1.6467630981275672e-05, "loss": 0.5293, "step": 3724 }, { "epoch": 0.9427992913186535, "grad_norm": 0.1508973389863968, "learning_rate": 1.6465804666566542e-05, "loss": 0.541, "step": 3725 }, { "epoch": 0.9430523917995444, "grad_norm": 0.14584723114967346, "learning_rate": 1.6463977981186545e-05, "loss": 0.5288, "step": 3726 }, { "epoch": 0.9433054922804354, "grad_norm": 0.14509539306163788, "learning_rate": 1.6462150925240403e-05, "loss": 0.5199, "step": 3727 }, { "epoch": 0.9435585927613263, "grad_norm": 0.15895618498325348, "learning_rate": 1.6460323498832856e-05, "loss": 0.5303, "step": 3728 }, { "epoch": 0.9438116932422171, "grad_norm": 0.15297171473503113, "learning_rate": 1.6458495702068667e-05, "loss": 0.5213, "step": 3729 }, { "epoch": 0.944064793723108, "grad_norm": 0.15110565721988678, "learning_rate": 1.645666753505262e-05, "loss": 0.5212, "step": 3730 }, { "epoch": 0.944317894203999, "grad_norm": 0.16017645597457886, "learning_rate": 1.645483899788952e-05, "loss": 0.5363, "step": 3731 }, { "epoch": 0.9445709946848899, "grad_norm": 0.21678687632083893, "learning_rate": 1.645301009068419e-05, "loss": 0.4942, "step": 3732 }, { "epoch": 0.9448240951657808, "grad_norm": 0.20207969844341278, "learning_rate": 1.6451180813541483e-05, "loss": 0.5625, "step": 3733 }, { "epoch": 0.9450771956466717, "grad_norm": 0.16444139182567596, "learning_rate": 1.6449351166566262e-05, "loss": 0.5143, "step": 3734 }, { "epoch": 0.9453302961275627, "grad_norm": 0.14706484973430634, "learning_rate": 1.644752114986342e-05, "loss": 0.5175, "step": 3735 }, { "epoch": 0.9455833966084536, "grad_norm": 0.14995858073234558, "learning_rate": 1.6445690763537867e-05, "loss": 0.5114, "step": 3736 }, { "epoch": 0.9458364970893445, "grad_norm": 0.16415844857692719, "learning_rate": 1.644386000769454e-05, "loss": 0.5115, "step": 3737 }, { "epoch": 0.9460895975702354, "grad_norm": 0.14436942338943481, "learning_rate": 1.6442028882438382e-05, "loss": 0.4989, "step": 3738 }, { "epoch": 0.9463426980511263, "grad_norm": 0.15018820762634277, "learning_rate": 1.6440197387874378e-05, "loss": 0.5292, "step": 3739 }, { "epoch": 0.9465957985320173, "grad_norm": 0.14948798716068268, "learning_rate": 1.6438365524107514e-05, "loss": 0.5559, "step": 3740 }, { "epoch": 0.9468488990129081, "grad_norm": 0.156710684299469, "learning_rate": 1.6436533291242814e-05, "loss": 0.527, "step": 3741 }, { "epoch": 0.947101999493799, "grad_norm": 0.1518458127975464, "learning_rate": 1.6434700689385313e-05, "loss": 0.545, "step": 3742 }, { "epoch": 0.9473550999746899, "grad_norm": 0.15717007219791412, "learning_rate": 1.6432867718640072e-05, "loss": 0.5381, "step": 3743 }, { "epoch": 0.9476082004555809, "grad_norm": 0.14824333786964417, "learning_rate": 1.6431034379112166e-05, "loss": 0.5292, "step": 3744 }, { "epoch": 0.9478613009364718, "grad_norm": 0.14966285228729248, "learning_rate": 1.6429200670906705e-05, "loss": 0.5353, "step": 3745 }, { "epoch": 0.9481144014173627, "grad_norm": 0.14533835649490356, "learning_rate": 1.642736659412881e-05, "loss": 0.5127, "step": 3746 }, { "epoch": 0.9483675018982536, "grad_norm": 0.14732496440410614, "learning_rate": 1.642553214888362e-05, "loss": 0.5474, "step": 3747 }, { "epoch": 0.9486206023791445, "grad_norm": 0.15061214566230774, "learning_rate": 1.64236973352763e-05, "loss": 0.519, "step": 3748 }, { "epoch": 0.9488737028600355, "grad_norm": 0.1512715071439743, "learning_rate": 1.642186215341204e-05, "loss": 0.5354, "step": 3749 }, { "epoch": 0.9491268033409264, "grad_norm": 0.1489042043685913, "learning_rate": 1.6420026603396046e-05, "loss": 0.5044, "step": 3750 }, { "epoch": 0.9493799038218173, "grad_norm": 0.14633409678936005, "learning_rate": 1.6418190685333545e-05, "loss": 0.537, "step": 3751 }, { "epoch": 0.9496330043027081, "grad_norm": 0.15067242085933685, "learning_rate": 1.6416354399329785e-05, "loss": 0.5315, "step": 3752 }, { "epoch": 0.9498861047835991, "grad_norm": 0.1458524614572525, "learning_rate": 1.6414517745490038e-05, "loss": 0.5463, "step": 3753 }, { "epoch": 0.95013920526449, "grad_norm": 0.15035071969032288, "learning_rate": 1.6412680723919593e-05, "loss": 0.5375, "step": 3754 }, { "epoch": 0.9503923057453809, "grad_norm": 0.14916814863681793, "learning_rate": 1.6410843334723768e-05, "loss": 0.515, "step": 3755 }, { "epoch": 0.9506454062262718, "grad_norm": 0.1461304873228073, "learning_rate": 1.6409005578007896e-05, "loss": 0.5117, "step": 3756 }, { "epoch": 0.9508985067071627, "grad_norm": 0.15287987887859344, "learning_rate": 1.6407167453877323e-05, "loss": 0.5396, "step": 3757 }, { "epoch": 0.9511516071880537, "grad_norm": 0.15371842682361603, "learning_rate": 1.6405328962437436e-05, "loss": 0.5179, "step": 3758 }, { "epoch": 0.9514047076689446, "grad_norm": 0.14629611372947693, "learning_rate": 1.6403490103793627e-05, "loss": 0.5133, "step": 3759 }, { "epoch": 0.9516578081498355, "grad_norm": 0.16815464198589325, "learning_rate": 1.6401650878051313e-05, "loss": 0.5232, "step": 3760 }, { "epoch": 0.9519109086307264, "grad_norm": 0.14734789729118347, "learning_rate": 1.639981128531593e-05, "loss": 0.525, "step": 3761 }, { "epoch": 0.9521640091116174, "grad_norm": 0.16419310867786407, "learning_rate": 1.6397971325692945e-05, "loss": 0.5383, "step": 3762 }, { "epoch": 0.9524171095925082, "grad_norm": 0.14848332107067108, "learning_rate": 1.6396130999287834e-05, "loss": 0.5169, "step": 3763 }, { "epoch": 0.9526702100733991, "grad_norm": 0.14701737463474274, "learning_rate": 1.6394290306206103e-05, "loss": 0.5309, "step": 3764 }, { "epoch": 0.95292331055429, "grad_norm": 0.14737538993358612, "learning_rate": 1.6392449246553273e-05, "loss": 0.5259, "step": 3765 }, { "epoch": 0.9531764110351809, "grad_norm": 0.15066717565059662, "learning_rate": 1.6390607820434884e-05, "loss": 0.5376, "step": 3766 }, { "epoch": 0.9534295115160719, "grad_norm": 0.15112866461277008, "learning_rate": 1.6388766027956507e-05, "loss": 0.5348, "step": 3767 }, { "epoch": 0.9536826119969628, "grad_norm": 0.17869140207767487, "learning_rate": 1.6386923869223723e-05, "loss": 0.5409, "step": 3768 }, { "epoch": 0.9539357124778537, "grad_norm": 0.1470973640680313, "learning_rate": 1.6385081344342144e-05, "loss": 0.5224, "step": 3769 }, { "epoch": 0.9541888129587446, "grad_norm": 0.14399096369743347, "learning_rate": 1.6383238453417397e-05, "loss": 0.503, "step": 3770 }, { "epoch": 0.9544419134396356, "grad_norm": 0.14881475269794464, "learning_rate": 1.638139519655513e-05, "loss": 0.5471, "step": 3771 }, { "epoch": 0.9546950139205265, "grad_norm": 0.15086811780929565, "learning_rate": 1.637955157386101e-05, "loss": 0.5539, "step": 3772 }, { "epoch": 0.9549481144014174, "grad_norm": 0.1603875756263733, "learning_rate": 1.6377707585440733e-05, "loss": 0.5547, "step": 3773 }, { "epoch": 0.9552012148823082, "grad_norm": 0.3683100640773773, "learning_rate": 1.6375863231400005e-05, "loss": 0.5051, "step": 3774 }, { "epoch": 0.9554543153631991, "grad_norm": 0.14303649961948395, "learning_rate": 1.637401851184457e-05, "loss": 0.5323, "step": 3775 }, { "epoch": 0.9557074158440901, "grad_norm": 0.14671659469604492, "learning_rate": 1.637217342688017e-05, "loss": 0.533, "step": 3776 }, { "epoch": 0.955960516324981, "grad_norm": 0.15702015161514282, "learning_rate": 1.637032797661258e-05, "loss": 0.5313, "step": 3777 }, { "epoch": 0.9562136168058719, "grad_norm": 0.14805901050567627, "learning_rate": 1.636848216114761e-05, "loss": 0.5072, "step": 3778 }, { "epoch": 0.9564667172867628, "grad_norm": 0.1447458118200302, "learning_rate": 1.636663598059106e-05, "loss": 0.5454, "step": 3779 }, { "epoch": 0.9567198177676538, "grad_norm": 0.1459626704454422, "learning_rate": 1.6364789435048772e-05, "loss": 0.5377, "step": 3780 }, { "epoch": 0.9569729182485447, "grad_norm": 0.1516941636800766, "learning_rate": 1.6362942524626614e-05, "loss": 0.5233, "step": 3781 }, { "epoch": 0.9572260187294356, "grad_norm": 0.14520078897476196, "learning_rate": 1.6361095249430455e-05, "loss": 0.5124, "step": 3782 }, { "epoch": 0.9574791192103265, "grad_norm": 0.15192930400371552, "learning_rate": 1.63592476095662e-05, "loss": 0.5162, "step": 3783 }, { "epoch": 0.9577322196912175, "grad_norm": 0.14521363377571106, "learning_rate": 1.635739960513977e-05, "loss": 0.5255, "step": 3784 }, { "epoch": 0.9579853201721084, "grad_norm": 0.14702746272087097, "learning_rate": 1.6355551236257102e-05, "loss": 0.5122, "step": 3785 }, { "epoch": 0.9582384206529992, "grad_norm": 0.1460426151752472, "learning_rate": 1.6353702503024168e-05, "loss": 0.5111, "step": 3786 }, { "epoch": 0.9584915211338901, "grad_norm": 0.1451456993818283, "learning_rate": 1.6351853405546944e-05, "loss": 0.522, "step": 3787 }, { "epoch": 0.958744621614781, "grad_norm": 0.14988650381565094, "learning_rate": 1.635000394393144e-05, "loss": 0.5293, "step": 3788 }, { "epoch": 0.958997722095672, "grad_norm": 0.15334585309028625, "learning_rate": 1.634815411828368e-05, "loss": 0.5087, "step": 3789 }, { "epoch": 0.9592508225765629, "grad_norm": 0.14127138257026672, "learning_rate": 1.6346303928709706e-05, "loss": 0.5311, "step": 3790 }, { "epoch": 0.9595039230574538, "grad_norm": 0.1901031732559204, "learning_rate": 1.6344453375315593e-05, "loss": 0.5593, "step": 3791 }, { "epoch": 0.9597570235383447, "grad_norm": 0.15265797078609467, "learning_rate": 1.6342602458207427e-05, "loss": 0.519, "step": 3792 }, { "epoch": 0.9600101240192357, "grad_norm": 0.1488872468471527, "learning_rate": 1.6340751177491316e-05, "loss": 0.5418, "step": 3793 }, { "epoch": 0.9602632245001266, "grad_norm": 0.1467711478471756, "learning_rate": 1.6338899533273388e-05, "loss": 0.5083, "step": 3794 }, { "epoch": 0.9605163249810175, "grad_norm": 0.14361095428466797, "learning_rate": 1.6337047525659802e-05, "loss": 0.492, "step": 3795 }, { "epoch": 0.9607694254619084, "grad_norm": 0.1403983235359192, "learning_rate": 1.6335195154756716e-05, "loss": 0.4922, "step": 3796 }, { "epoch": 0.9610225259427992, "grad_norm": 0.14709924161434174, "learning_rate": 1.6333342420670338e-05, "loss": 0.5513, "step": 3797 }, { "epoch": 0.9612756264236902, "grad_norm": 0.14563478529453278, "learning_rate": 1.6331489323506868e-05, "loss": 0.525, "step": 3798 }, { "epoch": 0.9615287269045811, "grad_norm": 0.15184901654720306, "learning_rate": 1.6329635863372548e-05, "loss": 0.5041, "step": 3799 }, { "epoch": 0.961781827385472, "grad_norm": 0.14962069690227509, "learning_rate": 1.6327782040373626e-05, "loss": 0.5218, "step": 3800 }, { "epoch": 0.9620349278663629, "grad_norm": 0.14599859714508057, "learning_rate": 1.632592785461639e-05, "loss": 0.513, "step": 3801 }, { "epoch": 0.9622880283472539, "grad_norm": 0.14386872947216034, "learning_rate": 1.6324073306207125e-05, "loss": 0.5376, "step": 3802 }, { "epoch": 0.9625411288281448, "grad_norm": 0.14680960774421692, "learning_rate": 1.6322218395252153e-05, "loss": 0.5156, "step": 3803 }, { "epoch": 0.9627942293090357, "grad_norm": 0.15352913737297058, "learning_rate": 1.6320363121857808e-05, "loss": 0.5279, "step": 3804 }, { "epoch": 0.9630473297899266, "grad_norm": 0.16522404551506042, "learning_rate": 1.6318507486130456e-05, "loss": 0.5416, "step": 3805 }, { "epoch": 0.9633004302708175, "grad_norm": 0.15523165464401245, "learning_rate": 1.631665148817647e-05, "loss": 0.4957, "step": 3806 }, { "epoch": 0.9635535307517085, "grad_norm": 0.1665385216474533, "learning_rate": 1.6314795128102256e-05, "loss": 0.5234, "step": 3807 }, { "epoch": 0.9638066312325994, "grad_norm": 0.16250942647457123, "learning_rate": 1.631293840601423e-05, "loss": 0.554, "step": 3808 }, { "epoch": 0.9640597317134902, "grad_norm": 0.1459292620420456, "learning_rate": 1.631108132201884e-05, "loss": 0.5082, "step": 3809 }, { "epoch": 0.9643128321943811, "grad_norm": 0.1556709259748459, "learning_rate": 1.6309223876222545e-05, "loss": 0.5629, "step": 3810 }, { "epoch": 0.9645659326752721, "grad_norm": 0.15264128148555756, "learning_rate": 1.6307366068731827e-05, "loss": 0.5323, "step": 3811 }, { "epoch": 0.964819033156163, "grad_norm": 0.14854317903518677, "learning_rate": 1.6305507899653193e-05, "loss": 0.5327, "step": 3812 }, { "epoch": 0.9650721336370539, "grad_norm": 0.14538481831550598, "learning_rate": 1.6303649369093165e-05, "loss": 0.5338, "step": 3813 }, { "epoch": 0.9653252341179448, "grad_norm": 0.1563609093427658, "learning_rate": 1.6301790477158294e-05, "loss": 0.5166, "step": 3814 }, { "epoch": 0.9655783345988357, "grad_norm": 0.14705339074134827, "learning_rate": 1.6299931223955136e-05, "loss": 0.5347, "step": 3815 }, { "epoch": 0.9658314350797267, "grad_norm": 0.14251990616321564, "learning_rate": 1.6298071609590286e-05, "loss": 0.5251, "step": 3816 }, { "epoch": 0.9660845355606176, "grad_norm": 0.16012468934059143, "learning_rate": 1.6296211634170354e-05, "loss": 0.5513, "step": 3817 }, { "epoch": 0.9663376360415085, "grad_norm": 0.1472565084695816, "learning_rate": 1.629435129780196e-05, "loss": 0.5347, "step": 3818 }, { "epoch": 0.9665907365223994, "grad_norm": 0.14485855400562286, "learning_rate": 1.629249060059176e-05, "loss": 0.5403, "step": 3819 }, { "epoch": 0.9668438370032904, "grad_norm": 0.14992330968379974, "learning_rate": 1.629062954264643e-05, "loss": 0.5299, "step": 3820 }, { "epoch": 0.9670969374841812, "grad_norm": 0.15015843510627747, "learning_rate": 1.628876812407264e-05, "loss": 0.5461, "step": 3821 }, { "epoch": 0.9673500379650721, "grad_norm": 0.14781992137432098, "learning_rate": 1.628690634497712e-05, "loss": 0.5088, "step": 3822 }, { "epoch": 0.967603138445963, "grad_norm": 0.1436677873134613, "learning_rate": 1.6285044205466592e-05, "loss": 0.5124, "step": 3823 }, { "epoch": 0.9678562389268539, "grad_norm": 0.15176212787628174, "learning_rate": 1.6283181705647812e-05, "loss": 0.557, "step": 3824 }, { "epoch": 0.9681093394077449, "grad_norm": 0.14748024940490723, "learning_rate": 1.628131884562756e-05, "loss": 0.518, "step": 3825 }, { "epoch": 0.9683624398886358, "grad_norm": 0.15286625921726227, "learning_rate": 1.6279455625512614e-05, "loss": 0.503, "step": 3826 }, { "epoch": 0.9686155403695267, "grad_norm": 0.1510486900806427, "learning_rate": 1.6277592045409802e-05, "loss": 0.5352, "step": 3827 }, { "epoch": 0.9688686408504176, "grad_norm": 0.148910254240036, "learning_rate": 1.6275728105425953e-05, "loss": 0.5318, "step": 3828 }, { "epoch": 0.9691217413313086, "grad_norm": 0.14115232229232788, "learning_rate": 1.6273863805667923e-05, "loss": 0.5019, "step": 3829 }, { "epoch": 0.9693748418121995, "grad_norm": 0.16815748810768127, "learning_rate": 1.627199914624259e-05, "loss": 0.5088, "step": 3830 }, { "epoch": 0.9696279422930904, "grad_norm": 0.16133743524551392, "learning_rate": 1.6270134127256853e-05, "loss": 0.5311, "step": 3831 }, { "epoch": 0.9698810427739812, "grad_norm": 0.1418672502040863, "learning_rate": 1.626826874881762e-05, "loss": 0.512, "step": 3832 }, { "epoch": 0.9701341432548722, "grad_norm": 0.1418655812740326, "learning_rate": 1.6266403011031844e-05, "loss": 0.5298, "step": 3833 }, { "epoch": 0.9703872437357631, "grad_norm": 0.14854706823825836, "learning_rate": 1.6264536914006472e-05, "loss": 0.5426, "step": 3834 }, { "epoch": 0.970640344216654, "grad_norm": 0.15471938252449036, "learning_rate": 1.6262670457848488e-05, "loss": 0.5248, "step": 3835 }, { "epoch": 0.9708934446975449, "grad_norm": 0.1533239185810089, "learning_rate": 1.6260803642664893e-05, "loss": 0.5646, "step": 3836 }, { "epoch": 0.9711465451784358, "grad_norm": 0.14689037203788757, "learning_rate": 1.6258936468562702e-05, "loss": 0.5296, "step": 3837 }, { "epoch": 0.9713996456593268, "grad_norm": 0.1485079973936081, "learning_rate": 1.6257068935648965e-05, "loss": 0.5097, "step": 3838 }, { "epoch": 0.9716527461402177, "grad_norm": 0.15799418091773987, "learning_rate": 1.6255201044030734e-05, "loss": 0.5244, "step": 3839 }, { "epoch": 0.9719058466211086, "grad_norm": 0.1632402241230011, "learning_rate": 1.6253332793815097e-05, "loss": 0.5378, "step": 3840 }, { "epoch": 0.9721589471019995, "grad_norm": 0.14659827947616577, "learning_rate": 1.6251464185109157e-05, "loss": 0.511, "step": 3841 }, { "epoch": 0.9724120475828905, "grad_norm": 0.1553066223859787, "learning_rate": 1.6249595218020037e-05, "loss": 0.5075, "step": 3842 }, { "epoch": 0.9726651480637813, "grad_norm": 0.14816173911094666, "learning_rate": 1.624772589265488e-05, "loss": 0.5452, "step": 3843 }, { "epoch": 0.9729182485446722, "grad_norm": 0.14994974434375763, "learning_rate": 1.6245856209120847e-05, "loss": 0.5466, "step": 3844 }, { "epoch": 0.9731713490255631, "grad_norm": 0.14969618618488312, "learning_rate": 1.624398616752513e-05, "loss": 0.5011, "step": 3845 }, { "epoch": 0.973424449506454, "grad_norm": 0.14794595539569855, "learning_rate": 1.624211576797493e-05, "loss": 0.5262, "step": 3846 }, { "epoch": 0.973677549987345, "grad_norm": 0.15413470566272736, "learning_rate": 1.6240245010577474e-05, "loss": 0.5484, "step": 3847 }, { "epoch": 0.9739306504682359, "grad_norm": 0.14749734103679657, "learning_rate": 1.6238373895440006e-05, "loss": 0.5156, "step": 3848 }, { "epoch": 0.9741837509491268, "grad_norm": 0.14444613456726074, "learning_rate": 1.62365024226698e-05, "loss": 0.5126, "step": 3849 }, { "epoch": 0.9744368514300177, "grad_norm": 0.15196967124938965, "learning_rate": 1.623463059237414e-05, "loss": 0.5392, "step": 3850 }, { "epoch": 0.9746899519109087, "grad_norm": 0.1544530689716339, "learning_rate": 1.623275840466033e-05, "loss": 0.5233, "step": 3851 }, { "epoch": 0.9749430523917996, "grad_norm": 0.15481902658939362, "learning_rate": 1.6230885859635703e-05, "loss": 0.5226, "step": 3852 }, { "epoch": 0.9751961528726905, "grad_norm": 0.14608952403068542, "learning_rate": 1.6229012957407604e-05, "loss": 0.5013, "step": 3853 }, { "epoch": 0.9754492533535813, "grad_norm": 0.22510144114494324, "learning_rate": 1.622713969808341e-05, "loss": 0.5354, "step": 3854 }, { "epoch": 0.9757023538344722, "grad_norm": 0.14725567400455475, "learning_rate": 1.6225266081770503e-05, "loss": 0.5239, "step": 3855 }, { "epoch": 0.9759554543153632, "grad_norm": 0.15374629199504852, "learning_rate": 1.6223392108576298e-05, "loss": 0.543, "step": 3856 }, { "epoch": 0.9762085547962541, "grad_norm": 0.1542677879333496, "learning_rate": 1.6221517778608227e-05, "loss": 0.507, "step": 3857 }, { "epoch": 0.976461655277145, "grad_norm": 0.1502179205417633, "learning_rate": 1.621964309197374e-05, "loss": 0.5383, "step": 3858 }, { "epoch": 0.9767147557580359, "grad_norm": 0.14840199053287506, "learning_rate": 1.6217768048780304e-05, "loss": 0.5541, "step": 3859 }, { "epoch": 0.9769678562389269, "grad_norm": 0.16259582340717316, "learning_rate": 1.6215892649135412e-05, "loss": 0.5344, "step": 3860 }, { "epoch": 0.9772209567198178, "grad_norm": 0.1516430675983429, "learning_rate": 1.6214016893146584e-05, "loss": 0.5297, "step": 3861 }, { "epoch": 0.9774740572007087, "grad_norm": 0.18059854209423065, "learning_rate": 1.621214078092135e-05, "loss": 0.5492, "step": 3862 }, { "epoch": 0.9777271576815996, "grad_norm": 0.15057961642742157, "learning_rate": 1.621026431256726e-05, "loss": 0.5481, "step": 3863 }, { "epoch": 0.9779802581624905, "grad_norm": 0.14518338441848755, "learning_rate": 1.6208387488191895e-05, "loss": 0.5244, "step": 3864 }, { "epoch": 0.9782333586433815, "grad_norm": 0.15141542255878448, "learning_rate": 1.6206510307902838e-05, "loss": 0.4997, "step": 3865 }, { "epoch": 0.9784864591242723, "grad_norm": 0.15622003376483917, "learning_rate": 1.620463277180772e-05, "loss": 0.5277, "step": 3866 }, { "epoch": 0.9787395596051632, "grad_norm": 0.1454780101776123, "learning_rate": 1.6202754880014158e-05, "loss": 0.5247, "step": 3867 }, { "epoch": 0.9789926600860541, "grad_norm": 0.14858952164649963, "learning_rate": 1.620087663262982e-05, "loss": 0.5299, "step": 3868 }, { "epoch": 0.9792457605669451, "grad_norm": 0.14994552731513977, "learning_rate": 1.6198998029762376e-05, "loss": 0.5275, "step": 3869 }, { "epoch": 0.979498861047836, "grad_norm": 0.14378589391708374, "learning_rate": 1.6197119071519528e-05, "loss": 0.5253, "step": 3870 }, { "epoch": 0.9797519615287269, "grad_norm": 0.14868232607841492, "learning_rate": 1.6195239758008985e-05, "loss": 0.5236, "step": 3871 }, { "epoch": 0.9800050620096178, "grad_norm": 0.1519884616136551, "learning_rate": 1.6193360089338493e-05, "loss": 0.5475, "step": 3872 }, { "epoch": 0.9802581624905087, "grad_norm": 0.14714932441711426, "learning_rate": 1.6191480065615798e-05, "loss": 0.4973, "step": 3873 }, { "epoch": 0.9805112629713997, "grad_norm": 0.14794203639030457, "learning_rate": 1.6189599686948693e-05, "loss": 0.5031, "step": 3874 }, { "epoch": 0.9807643634522906, "grad_norm": 0.14940579235553741, "learning_rate": 1.618771895344496e-05, "loss": 0.5202, "step": 3875 }, { "epoch": 0.9810174639331815, "grad_norm": 0.1445947140455246, "learning_rate": 1.618583786521243e-05, "loss": 0.5114, "step": 3876 }, { "epoch": 0.9812705644140723, "grad_norm": 0.15065394341945648, "learning_rate": 1.6183956422358935e-05, "loss": 0.5115, "step": 3877 }, { "epoch": 0.9815236648949633, "grad_norm": 0.1555759757757187, "learning_rate": 1.6182074624992338e-05, "loss": 0.5206, "step": 3878 }, { "epoch": 0.9817767653758542, "grad_norm": 0.15297970175743103, "learning_rate": 1.6180192473220512e-05, "loss": 0.5322, "step": 3879 }, { "epoch": 0.9820298658567451, "grad_norm": 0.1459963321685791, "learning_rate": 1.6178309967151366e-05, "loss": 0.4921, "step": 3880 }, { "epoch": 0.982282966337636, "grad_norm": 0.14950400590896606, "learning_rate": 1.6176427106892814e-05, "loss": 0.5332, "step": 3881 }, { "epoch": 0.982536066818527, "grad_norm": 0.15379725396633148, "learning_rate": 1.6174543892552793e-05, "loss": 0.5238, "step": 3882 }, { "epoch": 0.9827891672994179, "grad_norm": 0.15259183943271637, "learning_rate": 1.6172660324239275e-05, "loss": 0.5376, "step": 3883 }, { "epoch": 0.9830422677803088, "grad_norm": 0.15693873167037964, "learning_rate": 1.617077640206023e-05, "loss": 0.5315, "step": 3884 }, { "epoch": 0.9832953682611997, "grad_norm": 0.1517692655324936, "learning_rate": 1.6168892126123665e-05, "loss": 0.5344, "step": 3885 }, { "epoch": 0.9835484687420906, "grad_norm": 0.14472903311252594, "learning_rate": 1.61670074965376e-05, "loss": 0.5075, "step": 3886 }, { "epoch": 0.9838015692229816, "grad_norm": 0.14296984672546387, "learning_rate": 1.616512251341008e-05, "loss": 0.5123, "step": 3887 }, { "epoch": 0.9840546697038725, "grad_norm": 0.14558005332946777, "learning_rate": 1.616323717684916e-05, "loss": 0.4849, "step": 3888 }, { "epoch": 0.9843077701847633, "grad_norm": 0.15538431704044342, "learning_rate": 1.6161351486962928e-05, "loss": 0.541, "step": 3889 }, { "epoch": 0.9845608706656542, "grad_norm": 0.15170827507972717, "learning_rate": 1.6159465443859482e-05, "loss": 0.5256, "step": 3890 }, { "epoch": 0.9848139711465452, "grad_norm": 0.14378522336483002, "learning_rate": 1.615757904764695e-05, "loss": 0.5343, "step": 3891 }, { "epoch": 0.9850670716274361, "grad_norm": 0.14715576171875, "learning_rate": 1.6155692298433475e-05, "loss": 0.528, "step": 3892 }, { "epoch": 0.985320172108327, "grad_norm": 0.1473788172006607, "learning_rate": 1.6153805196327216e-05, "loss": 0.532, "step": 3893 }, { "epoch": 0.9855732725892179, "grad_norm": 0.15451231598854065, "learning_rate": 1.615191774143636e-05, "loss": 0.5179, "step": 3894 }, { "epoch": 0.9858263730701088, "grad_norm": 0.14568965137004852, "learning_rate": 1.6150029933869107e-05, "loss": 0.5334, "step": 3895 }, { "epoch": 0.9860794735509998, "grad_norm": 0.15080852806568146, "learning_rate": 1.6148141773733685e-05, "loss": 0.5326, "step": 3896 }, { "epoch": 0.9863325740318907, "grad_norm": 0.14815910160541534, "learning_rate": 1.614625326113834e-05, "loss": 0.5167, "step": 3897 }, { "epoch": 0.9865856745127816, "grad_norm": 0.14249533414840698, "learning_rate": 1.614436439619133e-05, "loss": 0.4969, "step": 3898 }, { "epoch": 0.9868387749936725, "grad_norm": 0.1495925337076187, "learning_rate": 1.6142475179000943e-05, "loss": 0.5222, "step": 3899 }, { "epoch": 0.9870918754745635, "grad_norm": 0.14467386901378632, "learning_rate": 1.6140585609675486e-05, "loss": 0.5245, "step": 3900 }, { "epoch": 0.9873449759554543, "grad_norm": 0.15033195912837982, "learning_rate": 1.6138695688323278e-05, "loss": 0.5071, "step": 3901 }, { "epoch": 0.9875980764363452, "grad_norm": 0.14334291219711304, "learning_rate": 1.613680541505267e-05, "loss": 0.5415, "step": 3902 }, { "epoch": 0.9878511769172361, "grad_norm": 0.14398124814033508, "learning_rate": 1.6134914789972022e-05, "loss": 0.5135, "step": 3903 }, { "epoch": 0.988104277398127, "grad_norm": 0.15389041602611542, "learning_rate": 1.6133023813189728e-05, "loss": 0.5159, "step": 3904 }, { "epoch": 0.988357377879018, "grad_norm": 0.14809522032737732, "learning_rate": 1.6131132484814184e-05, "loss": 0.5142, "step": 3905 }, { "epoch": 0.9886104783599089, "grad_norm": 0.14882373809814453, "learning_rate": 1.6129240804953825e-05, "loss": 0.5294, "step": 3906 }, { "epoch": 0.9888635788407998, "grad_norm": 0.15086989104747772, "learning_rate": 1.6127348773717087e-05, "loss": 0.5456, "step": 3907 }, { "epoch": 0.9891166793216907, "grad_norm": 0.1454935073852539, "learning_rate": 1.6125456391212446e-05, "loss": 0.515, "step": 3908 }, { "epoch": 0.9893697798025817, "grad_norm": 0.14580386877059937, "learning_rate": 1.6123563657548382e-05, "loss": 0.4836, "step": 3909 }, { "epoch": 0.9896228802834726, "grad_norm": 0.1508018523454666, "learning_rate": 1.61216705728334e-05, "loss": 0.5305, "step": 3910 }, { "epoch": 0.9898759807643635, "grad_norm": 0.14905580878257751, "learning_rate": 1.6119777137176035e-05, "loss": 0.5172, "step": 3911 }, { "epoch": 0.9901290812452543, "grad_norm": 0.1432248204946518, "learning_rate": 1.6117883350684827e-05, "loss": 0.5242, "step": 3912 }, { "epoch": 0.9903821817261452, "grad_norm": 0.1491553783416748, "learning_rate": 1.6115989213468338e-05, "loss": 0.5177, "step": 3913 }, { "epoch": 0.9906352822070362, "grad_norm": 0.14771565794944763, "learning_rate": 1.6114094725635168e-05, "loss": 0.507, "step": 3914 }, { "epoch": 0.9908883826879271, "grad_norm": 0.15701542794704437, "learning_rate": 1.6112199887293916e-05, "loss": 0.5237, "step": 3915 }, { "epoch": 0.991141483168818, "grad_norm": 0.1535111665725708, "learning_rate": 1.6110304698553206e-05, "loss": 0.5408, "step": 3916 }, { "epoch": 0.9913945836497089, "grad_norm": 0.15653599798679352, "learning_rate": 1.6108409159521692e-05, "loss": 0.5303, "step": 3917 }, { "epoch": 0.9916476841305999, "grad_norm": 0.16198381781578064, "learning_rate": 1.610651327030804e-05, "loss": 0.5241, "step": 3918 }, { "epoch": 0.9919007846114908, "grad_norm": 0.14654488861560822, "learning_rate": 1.610461703102093e-05, "loss": 0.5249, "step": 3919 }, { "epoch": 0.9921538850923817, "grad_norm": 0.15079180896282196, "learning_rate": 1.6102720441769077e-05, "loss": 0.5479, "step": 3920 }, { "epoch": 0.9924069855732726, "grad_norm": 0.1462228149175644, "learning_rate": 1.610082350266121e-05, "loss": 0.5034, "step": 3921 }, { "epoch": 0.9926600860541634, "grad_norm": 0.1475261002779007, "learning_rate": 1.6098926213806068e-05, "loss": 0.5169, "step": 3922 }, { "epoch": 0.9929131865350544, "grad_norm": 0.1557222157716751, "learning_rate": 1.6097028575312427e-05, "loss": 0.501, "step": 3923 }, { "epoch": 0.9931662870159453, "grad_norm": 0.15464568138122559, "learning_rate": 1.609513058728907e-05, "loss": 0.5179, "step": 3924 }, { "epoch": 0.9934193874968362, "grad_norm": 0.14977793395519257, "learning_rate": 1.6093232249844807e-05, "loss": 0.5462, "step": 3925 }, { "epoch": 0.9936724879777271, "grad_norm": 0.14974834024906158, "learning_rate": 1.6091333563088462e-05, "loss": 0.5274, "step": 3926 }, { "epoch": 0.9939255884586181, "grad_norm": 0.15120790898799896, "learning_rate": 1.6089434527128886e-05, "loss": 0.5097, "step": 3927 }, { "epoch": 0.994178688939509, "grad_norm": 0.15376362204551697, "learning_rate": 1.6087535142074948e-05, "loss": 0.5162, "step": 3928 }, { "epoch": 0.9944317894203999, "grad_norm": 0.14804089069366455, "learning_rate": 1.608563540803553e-05, "loss": 0.5307, "step": 3929 }, { "epoch": 0.9946848899012908, "grad_norm": 0.1548173874616623, "learning_rate": 1.6083735325119545e-05, "loss": 0.5015, "step": 3930 }, { "epoch": 0.9949379903821818, "grad_norm": 0.16197916865348816, "learning_rate": 1.6081834893435918e-05, "loss": 0.5291, "step": 3931 }, { "epoch": 0.9951910908630727, "grad_norm": 0.155508354306221, "learning_rate": 1.6079934113093598e-05, "loss": 0.5499, "step": 3932 }, { "epoch": 0.9954441913439636, "grad_norm": 0.14825887978076935, "learning_rate": 1.6078032984201553e-05, "loss": 0.5146, "step": 3933 }, { "epoch": 0.9956972918248544, "grad_norm": 0.15199661254882812, "learning_rate": 1.607613150686877e-05, "loss": 0.5164, "step": 3934 }, { "epoch": 0.9959503923057453, "grad_norm": 0.1496342420578003, "learning_rate": 1.6074229681204254e-05, "loss": 0.5235, "step": 3935 }, { "epoch": 0.9962034927866363, "grad_norm": 0.15322662889957428, "learning_rate": 1.6072327507317037e-05, "loss": 0.536, "step": 3936 }, { "epoch": 0.9964565932675272, "grad_norm": 0.1493048220872879, "learning_rate": 1.6070424985316165e-05, "loss": 0.5078, "step": 3937 }, { "epoch": 0.9967096937484181, "grad_norm": 0.14688515663146973, "learning_rate": 1.6068522115310705e-05, "loss": 0.5198, "step": 3938 }, { "epoch": 0.996962794229309, "grad_norm": 0.14200732111930847, "learning_rate": 1.6066618897409746e-05, "loss": 0.5224, "step": 3939 }, { "epoch": 0.9972158947102, "grad_norm": 0.14864256978034973, "learning_rate": 1.6064715331722395e-05, "loss": 0.5359, "step": 3940 }, { "epoch": 0.9974689951910909, "grad_norm": 0.1501983106136322, "learning_rate": 1.6062811418357777e-05, "loss": 0.4984, "step": 3941 }, { "epoch": 0.9977220956719818, "grad_norm": 0.15095578134059906, "learning_rate": 1.6060907157425044e-05, "loss": 0.5167, "step": 3942 }, { "epoch": 0.9979751961528727, "grad_norm": 0.1415361911058426, "learning_rate": 1.6059002549033355e-05, "loss": 0.5057, "step": 3943 }, { "epoch": 0.9982282966337636, "grad_norm": 0.14795713126659393, "learning_rate": 1.605709759329191e-05, "loss": 0.5122, "step": 3944 }, { "epoch": 0.9984813971146546, "grad_norm": 0.162861630320549, "learning_rate": 1.6055192290309904e-05, "loss": 0.5262, "step": 3945 }, { "epoch": 0.9987344975955454, "grad_norm": 0.16867463290691376, "learning_rate": 1.6053286640196567e-05, "loss": 0.5071, "step": 3946 }, { "epoch": 0.9989875980764363, "grad_norm": 0.14524881541728973, "learning_rate": 1.605138064306115e-05, "loss": 0.4979, "step": 3947 }, { "epoch": 0.9992406985573272, "grad_norm": 0.1482476443052292, "learning_rate": 1.6049474299012915e-05, "loss": 0.5309, "step": 3948 }, { "epoch": 0.9994937990382182, "grad_norm": 0.15403158962726593, "learning_rate": 1.6047567608161154e-05, "loss": 0.5286, "step": 3949 }, { "epoch": 0.9997468995191091, "grad_norm": 0.14373505115509033, "learning_rate": 1.6045660570615168e-05, "loss": 0.5067, "step": 3950 }, { "epoch": 1.0, "grad_norm": 0.1479228436946869, "learning_rate": 1.6043753186484287e-05, "loss": 0.5169, "step": 3951 }, { "epoch": 1.0, "eval_loss": 0.802573561668396, "eval_runtime": 1052.193, "eval_samples_per_second": 40.41, "eval_steps_per_second": 0.632, "step": 3951 }, { "epoch": 1.000253100480891, "grad_norm": 0.15048058331012726, "learning_rate": 1.6041845455877854e-05, "loss": 0.5343, "step": 3952 }, { "epoch": 1.0005062009617818, "grad_norm": 0.16292202472686768, "learning_rate": 1.603993737890524e-05, "loss": 0.54, "step": 3953 }, { "epoch": 1.0007593014426728, "grad_norm": 0.14469090104103088, "learning_rate": 1.6038028955675824e-05, "loss": 0.5092, "step": 3954 }, { "epoch": 1.0010124019235636, "grad_norm": 0.14573781192302704, "learning_rate": 1.603612018629902e-05, "loss": 0.5223, "step": 3955 }, { "epoch": 1.0012655024044546, "grad_norm": 0.1446424424648285, "learning_rate": 1.603421107088425e-05, "loss": 0.5328, "step": 3956 }, { "epoch": 1.0015186028853456, "grad_norm": 0.14097891747951508, "learning_rate": 1.6032301609540957e-05, "loss": 0.5173, "step": 3957 }, { "epoch": 1.0017717033662363, "grad_norm": 0.14679549634456635, "learning_rate": 1.603039180237861e-05, "loss": 0.5259, "step": 3958 }, { "epoch": 1.0020248038471273, "grad_norm": 0.1723938137292862, "learning_rate": 1.6028481649506692e-05, "loss": 0.5322, "step": 3959 }, { "epoch": 1.0022779043280183, "grad_norm": 0.1614323854446411, "learning_rate": 1.602657115103471e-05, "loss": 0.5351, "step": 3960 }, { "epoch": 1.002531004808909, "grad_norm": 0.14451397955417633, "learning_rate": 1.6024660307072188e-05, "loss": 0.5193, "step": 3961 }, { "epoch": 1.0027841052898, "grad_norm": 0.1448938548564911, "learning_rate": 1.6022749117728668e-05, "loss": 0.5132, "step": 3962 }, { "epoch": 1.0030372057706909, "grad_norm": 0.15474946796894073, "learning_rate": 1.602083758311372e-05, "loss": 0.5312, "step": 3963 }, { "epoch": 1.0032903062515819, "grad_norm": 0.15194155275821686, "learning_rate": 1.6018925703336923e-05, "loss": 0.5263, "step": 3964 }, { "epoch": 1.0035434067324729, "grad_norm": 0.1465476155281067, "learning_rate": 1.6017013478507884e-05, "loss": 0.4965, "step": 3965 }, { "epoch": 1.0037965072133637, "grad_norm": 0.1407623142004013, "learning_rate": 1.6015100908736226e-05, "loss": 0.5423, "step": 3966 }, { "epoch": 1.0040496076942547, "grad_norm": 0.15105776488780975, "learning_rate": 1.601318799413159e-05, "loss": 0.5107, "step": 3967 }, { "epoch": 1.0043027081751454, "grad_norm": 0.1427195519208908, "learning_rate": 1.6011274734803642e-05, "loss": 0.5216, "step": 3968 }, { "epoch": 1.0045558086560364, "grad_norm": 0.14620138704776764, "learning_rate": 1.600936113086207e-05, "loss": 0.5065, "step": 3969 }, { "epoch": 1.0048089091369274, "grad_norm": 0.15167401731014252, "learning_rate": 1.6007447182416565e-05, "loss": 0.5243, "step": 3970 }, { "epoch": 1.0050620096178182, "grad_norm": 0.14876510202884674, "learning_rate": 1.600553288957686e-05, "loss": 0.5155, "step": 3971 }, { "epoch": 1.0053151100987092, "grad_norm": 0.15090231597423553, "learning_rate": 1.6003618252452694e-05, "loss": 0.5178, "step": 3972 }, { "epoch": 1.0055682105796, "grad_norm": 0.14906059205532074, "learning_rate": 1.6001703271153828e-05, "loss": 0.5163, "step": 3973 }, { "epoch": 1.005821311060491, "grad_norm": 0.15658830106258392, "learning_rate": 1.5999787945790043e-05, "loss": 0.5257, "step": 3974 }, { "epoch": 1.006074411541382, "grad_norm": 0.14758512377738953, "learning_rate": 1.5997872276471146e-05, "loss": 0.5253, "step": 3975 }, { "epoch": 1.0063275120222728, "grad_norm": 0.144147127866745, "learning_rate": 1.5995956263306948e-05, "loss": 0.4999, "step": 3976 }, { "epoch": 1.0065806125031638, "grad_norm": 0.1440252810716629, "learning_rate": 1.59940399064073e-05, "loss": 0.5185, "step": 3977 }, { "epoch": 1.0068337129840548, "grad_norm": 0.14704982936382294, "learning_rate": 1.5992123205882063e-05, "loss": 0.5029, "step": 3978 }, { "epoch": 1.0070868134649456, "grad_norm": 0.14515119791030884, "learning_rate": 1.5990206161841106e-05, "loss": 0.4966, "step": 3979 }, { "epoch": 1.0073399139458366, "grad_norm": 0.14842024445533752, "learning_rate": 1.5988288774394344e-05, "loss": 0.5118, "step": 3980 }, { "epoch": 1.0075930144267273, "grad_norm": 0.15083995461463928, "learning_rate": 1.5986371043651684e-05, "loss": 0.5271, "step": 3981 }, { "epoch": 1.0078461149076183, "grad_norm": 0.14858633279800415, "learning_rate": 1.598445296972307e-05, "loss": 0.5253, "step": 3982 }, { "epoch": 1.0080992153885093, "grad_norm": 0.14761056005954742, "learning_rate": 1.5982534552718463e-05, "loss": 0.5417, "step": 3983 }, { "epoch": 1.0083523158694, "grad_norm": 0.1481616497039795, "learning_rate": 1.598061579274784e-05, "loss": 0.515, "step": 3984 }, { "epoch": 1.008605416350291, "grad_norm": 0.15625053644180298, "learning_rate": 1.59786966899212e-05, "loss": 0.5365, "step": 3985 }, { "epoch": 1.0088585168311819, "grad_norm": 0.1502244919538498, "learning_rate": 1.597677724434856e-05, "loss": 0.5226, "step": 3986 }, { "epoch": 1.0091116173120729, "grad_norm": 0.14703486859798431, "learning_rate": 1.5974857456139962e-05, "loss": 0.5028, "step": 3987 }, { "epoch": 1.0093647177929639, "grad_norm": 0.1475391983985901, "learning_rate": 1.597293732540546e-05, "loss": 0.5099, "step": 3988 }, { "epoch": 1.0096178182738547, "grad_norm": 0.14941337704658508, "learning_rate": 1.597101685225513e-05, "loss": 0.5357, "step": 3989 }, { "epoch": 1.0098709187547457, "grad_norm": 0.1445443034172058, "learning_rate": 1.5969096036799067e-05, "loss": 0.5017, "step": 3990 }, { "epoch": 1.0101240192356364, "grad_norm": 0.14427700638771057, "learning_rate": 1.5967174879147392e-05, "loss": 0.4951, "step": 3991 }, { "epoch": 1.0103771197165274, "grad_norm": 0.16891737282276154, "learning_rate": 1.5965253379410244e-05, "loss": 0.5337, "step": 3992 }, { "epoch": 1.0106302201974184, "grad_norm": 0.1481829285621643, "learning_rate": 1.5963331537697767e-05, "loss": 0.5174, "step": 3993 }, { "epoch": 1.0108833206783092, "grad_norm": 0.1455804854631424, "learning_rate": 1.596140935412015e-05, "loss": 0.4959, "step": 3994 }, { "epoch": 1.0111364211592002, "grad_norm": 0.1514011174440384, "learning_rate": 1.595948682878757e-05, "loss": 0.5391, "step": 3995 }, { "epoch": 1.0113895216400912, "grad_norm": 0.1498788446187973, "learning_rate": 1.595756396181026e-05, "loss": 0.5202, "step": 3996 }, { "epoch": 1.011642622120982, "grad_norm": 0.14775054156780243, "learning_rate": 1.595564075329844e-05, "loss": 0.5193, "step": 3997 }, { "epoch": 1.011895722601873, "grad_norm": 0.14672809839248657, "learning_rate": 1.595371720336237e-05, "loss": 0.5214, "step": 3998 }, { "epoch": 1.0121488230827638, "grad_norm": 0.149889275431633, "learning_rate": 1.5951793312112328e-05, "loss": 0.5047, "step": 3999 }, { "epoch": 1.0124019235636548, "grad_norm": 0.145270437002182, "learning_rate": 1.5949869079658597e-05, "loss": 0.4949, "step": 4000 }, { "epoch": 1.0126550240445458, "grad_norm": 0.14930623769760132, "learning_rate": 1.5947944506111487e-05, "loss": 0.5128, "step": 4001 }, { "epoch": 1.0129081245254365, "grad_norm": 0.14835262298583984, "learning_rate": 1.5946019591581344e-05, "loss": 0.5081, "step": 4002 }, { "epoch": 1.0131612250063275, "grad_norm": 0.18459531664848328, "learning_rate": 1.5944094336178506e-05, "loss": 0.5309, "step": 4003 }, { "epoch": 1.0134143254872183, "grad_norm": 0.15518943965435028, "learning_rate": 1.5942168740013347e-05, "loss": 0.5437, "step": 4004 }, { "epoch": 1.0136674259681093, "grad_norm": 0.14393088221549988, "learning_rate": 1.5940242803196263e-05, "loss": 0.535, "step": 4005 }, { "epoch": 1.0139205264490003, "grad_norm": 0.14458800852298737, "learning_rate": 1.593831652583766e-05, "loss": 0.5187, "step": 4006 }, { "epoch": 1.014173626929891, "grad_norm": 0.14523963630199432, "learning_rate": 1.5936389908047965e-05, "loss": 0.5329, "step": 4007 }, { "epoch": 1.014426727410782, "grad_norm": 0.14631815254688263, "learning_rate": 1.593446294993763e-05, "loss": 0.5129, "step": 4008 }, { "epoch": 1.014679827891673, "grad_norm": 0.14593419432640076, "learning_rate": 1.593253565161712e-05, "loss": 0.5125, "step": 4009 }, { "epoch": 1.0149329283725639, "grad_norm": 0.1460360288619995, "learning_rate": 1.5930608013196927e-05, "loss": 0.5022, "step": 4010 }, { "epoch": 1.0151860288534549, "grad_norm": 0.15701471269130707, "learning_rate": 1.592868003478756e-05, "loss": 0.5241, "step": 4011 }, { "epoch": 1.0154391293343457, "grad_norm": 0.1454389989376068, "learning_rate": 1.5926751716499536e-05, "loss": 0.5042, "step": 4012 }, { "epoch": 1.0156922298152367, "grad_norm": 0.1508450210094452, "learning_rate": 1.5924823058443413e-05, "loss": 0.5002, "step": 4013 }, { "epoch": 1.0159453302961277, "grad_norm": 0.1607152670621872, "learning_rate": 1.5922894060729755e-05, "loss": 0.5169, "step": 4014 }, { "epoch": 1.0161984307770184, "grad_norm": 0.14628514647483826, "learning_rate": 1.592096472346914e-05, "loss": 0.5087, "step": 4015 }, { "epoch": 1.0164515312579094, "grad_norm": 0.14681223034858704, "learning_rate": 1.5919035046772177e-05, "loss": 0.4981, "step": 4016 }, { "epoch": 1.0167046317388002, "grad_norm": 0.1476987600326538, "learning_rate": 1.5917105030749492e-05, "loss": 0.5245, "step": 4017 }, { "epoch": 1.0169577322196912, "grad_norm": 0.15117384493350983, "learning_rate": 1.5915174675511725e-05, "loss": 0.5298, "step": 4018 }, { "epoch": 1.0172108327005822, "grad_norm": 0.14656352996826172, "learning_rate": 1.591324398116955e-05, "loss": 0.5171, "step": 4019 }, { "epoch": 1.017463933181473, "grad_norm": 0.15204362571239471, "learning_rate": 1.591131294783363e-05, "loss": 0.5022, "step": 4020 }, { "epoch": 1.017717033662364, "grad_norm": 0.1478562206029892, "learning_rate": 1.5909381575614682e-05, "loss": 0.4988, "step": 4021 }, { "epoch": 1.0179701341432548, "grad_norm": 0.1505058854818344, "learning_rate": 1.5907449864623426e-05, "loss": 0.5223, "step": 4022 }, { "epoch": 1.0182232346241458, "grad_norm": 0.1501818597316742, "learning_rate": 1.5905517814970597e-05, "loss": 0.4854, "step": 4023 }, { "epoch": 1.0184763351050368, "grad_norm": 0.14994163811206818, "learning_rate": 1.590358542676696e-05, "loss": 0.5081, "step": 4024 }, { "epoch": 1.0187294355859275, "grad_norm": 0.14561603963375092, "learning_rate": 1.5901652700123295e-05, "loss": 0.4968, "step": 4025 }, { "epoch": 1.0189825360668185, "grad_norm": 0.14379946887493134, "learning_rate": 1.589971963515039e-05, "loss": 0.5127, "step": 4026 }, { "epoch": 1.0192356365477095, "grad_norm": 0.14859431982040405, "learning_rate": 1.5897786231959085e-05, "loss": 0.5109, "step": 4027 }, { "epoch": 1.0194887370286003, "grad_norm": 0.14974714815616608, "learning_rate": 1.5895852490660203e-05, "loss": 0.5118, "step": 4028 }, { "epoch": 1.0197418375094913, "grad_norm": 0.15788091719150543, "learning_rate": 1.5893918411364603e-05, "loss": 0.5074, "step": 4029 }, { "epoch": 1.019994937990382, "grad_norm": 0.15516100823879242, "learning_rate": 1.5891983994183163e-05, "loss": 0.5087, "step": 4030 }, { "epoch": 1.020248038471273, "grad_norm": 0.1582939624786377, "learning_rate": 1.5890049239226776e-05, "loss": 0.5066, "step": 4031 }, { "epoch": 1.020501138952164, "grad_norm": 0.1454380303621292, "learning_rate": 1.5888114146606363e-05, "loss": 0.5003, "step": 4032 }, { "epoch": 1.0207542394330549, "grad_norm": 0.1489076018333435, "learning_rate": 1.5886178716432857e-05, "loss": 0.5188, "step": 4033 }, { "epoch": 1.0210073399139459, "grad_norm": 0.16033610701560974, "learning_rate": 1.588424294881721e-05, "loss": 0.5158, "step": 4034 }, { "epoch": 1.0212604403948367, "grad_norm": 0.1460811048746109, "learning_rate": 1.58823068438704e-05, "loss": 0.5493, "step": 4035 }, { "epoch": 1.0215135408757277, "grad_norm": 0.1477465182542801, "learning_rate": 1.5880370401703413e-05, "loss": 0.5155, "step": 4036 }, { "epoch": 1.0217666413566187, "grad_norm": 0.14531001448631287, "learning_rate": 1.5878433622427264e-05, "loss": 0.4846, "step": 4037 }, { "epoch": 1.0220197418375094, "grad_norm": 0.15506970882415771, "learning_rate": 1.5876496506152986e-05, "loss": 0.5411, "step": 4038 }, { "epoch": 1.0222728423184004, "grad_norm": 0.1446245014667511, "learning_rate": 1.587455905299163e-05, "loss": 0.5098, "step": 4039 }, { "epoch": 1.0225259427992912, "grad_norm": 0.142917662858963, "learning_rate": 1.5872621263054263e-05, "loss": 0.4929, "step": 4040 }, { "epoch": 1.0227790432801822, "grad_norm": 0.14697298407554626, "learning_rate": 1.587068313645198e-05, "loss": 0.4819, "step": 4041 }, { "epoch": 1.0230321437610732, "grad_norm": 0.1473260521888733, "learning_rate": 1.586874467329588e-05, "loss": 0.4932, "step": 4042 }, { "epoch": 1.023285244241964, "grad_norm": 0.1494176834821701, "learning_rate": 1.5866805873697103e-05, "loss": 0.5233, "step": 4043 }, { "epoch": 1.023538344722855, "grad_norm": 0.1498187631368637, "learning_rate": 1.5864866737766786e-05, "loss": 0.5054, "step": 4044 }, { "epoch": 1.023791445203746, "grad_norm": 0.14878883957862854, "learning_rate": 1.58629272656161e-05, "loss": 0.5311, "step": 4045 }, { "epoch": 1.0240445456846368, "grad_norm": 0.1482565850019455, "learning_rate": 1.586098745735623e-05, "loss": 0.477, "step": 4046 }, { "epoch": 1.0242976461655278, "grad_norm": 0.14399349689483643, "learning_rate": 1.5859047313098384e-05, "loss": 0.5009, "step": 4047 }, { "epoch": 1.0245507466464185, "grad_norm": 0.14856289327144623, "learning_rate": 1.5857106832953783e-05, "loss": 0.5055, "step": 4048 }, { "epoch": 1.0248038471273095, "grad_norm": 0.14767755568027496, "learning_rate": 1.585516601703367e-05, "loss": 0.5137, "step": 4049 }, { "epoch": 1.0250569476082005, "grad_norm": 0.14708256721496582, "learning_rate": 1.5853224865449312e-05, "loss": 0.4973, "step": 4050 }, { "epoch": 1.0253100480890913, "grad_norm": 0.1508316546678543, "learning_rate": 1.5851283378311987e-05, "loss": 0.498, "step": 4051 }, { "epoch": 1.0255631485699823, "grad_norm": 0.1524588018655777, "learning_rate": 1.5849341555733004e-05, "loss": 0.5089, "step": 4052 }, { "epoch": 1.025816249050873, "grad_norm": 0.1526605188846588, "learning_rate": 1.5847399397823668e-05, "loss": 0.5383, "step": 4053 }, { "epoch": 1.026069349531764, "grad_norm": 0.14852261543273926, "learning_rate": 1.5845456904695334e-05, "loss": 0.5421, "step": 4054 }, { "epoch": 1.026322450012655, "grad_norm": 0.14604701101779938, "learning_rate": 1.5843514076459353e-05, "loss": 0.4856, "step": 4055 }, { "epoch": 1.0265755504935459, "grad_norm": 0.20727510750293732, "learning_rate": 1.5841570913227107e-05, "loss": 0.4919, "step": 4056 }, { "epoch": 1.0268286509744369, "grad_norm": 0.14798125624656677, "learning_rate": 1.583962741510999e-05, "loss": 0.5219, "step": 4057 }, { "epoch": 1.0270817514553279, "grad_norm": 0.14734314382076263, "learning_rate": 1.5837683582219422e-05, "loss": 0.4906, "step": 4058 }, { "epoch": 1.0273348519362187, "grad_norm": 0.14707981050014496, "learning_rate": 1.5835739414666838e-05, "loss": 0.5146, "step": 4059 }, { "epoch": 1.0275879524171097, "grad_norm": 0.14466910064220428, "learning_rate": 1.583379491256369e-05, "loss": 0.5057, "step": 4060 }, { "epoch": 1.0278410528980004, "grad_norm": 0.14874565601348877, "learning_rate": 1.5831850076021454e-05, "loss": 0.5272, "step": 4061 }, { "epoch": 1.0280941533788914, "grad_norm": 0.14744755625724792, "learning_rate": 1.582990490515163e-05, "loss": 0.4988, "step": 4062 }, { "epoch": 1.0283472538597824, "grad_norm": 0.14657118916511536, "learning_rate": 1.582795940006572e-05, "loss": 0.478, "step": 4063 }, { "epoch": 1.0286003543406732, "grad_norm": 0.14777787029743195, "learning_rate": 1.582601356087526e-05, "loss": 0.491, "step": 4064 }, { "epoch": 1.0288534548215642, "grad_norm": 0.15586577355861664, "learning_rate": 1.58240673876918e-05, "loss": 0.5372, "step": 4065 }, { "epoch": 1.029106555302455, "grad_norm": 0.14716537296772003, "learning_rate": 1.5822120880626915e-05, "loss": 0.4803, "step": 4066 }, { "epoch": 1.029359655783346, "grad_norm": 0.14855928719043732, "learning_rate": 1.5820174039792185e-05, "loss": 0.4985, "step": 4067 }, { "epoch": 1.029612756264237, "grad_norm": 0.15508636832237244, "learning_rate": 1.5818226865299228e-05, "loss": 0.4873, "step": 4068 }, { "epoch": 1.0298658567451278, "grad_norm": 0.16332915425300598, "learning_rate": 1.581627935725966e-05, "loss": 0.515, "step": 4069 }, { "epoch": 1.0301189572260188, "grad_norm": 0.14844995737075806, "learning_rate": 1.5814331515785138e-05, "loss": 0.4643, "step": 4070 }, { "epoch": 1.0303720577069095, "grad_norm": 0.14485201239585876, "learning_rate": 1.5812383340987328e-05, "loss": 0.505, "step": 4071 }, { "epoch": 1.0306251581878005, "grad_norm": 0.1530630886554718, "learning_rate": 1.5810434832977906e-05, "loss": 0.5308, "step": 4072 }, { "epoch": 1.0308782586686915, "grad_norm": 0.1502642184495926, "learning_rate": 1.580848599186858e-05, "loss": 0.516, "step": 4073 }, { "epoch": 1.0311313591495823, "grad_norm": 0.15724213421344757, "learning_rate": 1.5806536817771076e-05, "loss": 0.5066, "step": 4074 }, { "epoch": 1.0313844596304733, "grad_norm": 0.14878012239933014, "learning_rate": 1.5804587310797133e-05, "loss": 0.5343, "step": 4075 }, { "epoch": 1.0316375601113643, "grad_norm": 0.15363094210624695, "learning_rate": 1.580263747105851e-05, "loss": 0.5308, "step": 4076 }, { "epoch": 1.031890660592255, "grad_norm": 0.17660903930664062, "learning_rate": 1.5800687298666992e-05, "loss": 0.5179, "step": 4077 }, { "epoch": 1.032143761073146, "grad_norm": 0.1510571539402008, "learning_rate": 1.579873679373437e-05, "loss": 0.5065, "step": 4078 }, { "epoch": 1.0323968615540369, "grad_norm": 0.1503949761390686, "learning_rate": 1.5796785956372476e-05, "loss": 0.5103, "step": 4079 }, { "epoch": 1.0326499620349279, "grad_norm": 0.16240382194519043, "learning_rate": 1.579483478669314e-05, "loss": 0.5158, "step": 4080 }, { "epoch": 1.0329030625158189, "grad_norm": 0.14559580385684967, "learning_rate": 1.579288328480821e-05, "loss": 0.4869, "step": 4081 }, { "epoch": 1.0331561629967096, "grad_norm": 0.15265142917633057, "learning_rate": 1.5790931450829576e-05, "loss": 0.4771, "step": 4082 }, { "epoch": 1.0334092634776006, "grad_norm": 0.14774282276630402, "learning_rate": 1.5788979284869123e-05, "loss": 0.5345, "step": 4083 }, { "epoch": 1.0336623639584914, "grad_norm": 0.1448781043291092, "learning_rate": 1.578702678703877e-05, "loss": 0.5071, "step": 4084 }, { "epoch": 1.0339154644393824, "grad_norm": 0.149511456489563, "learning_rate": 1.5785073957450443e-05, "loss": 0.4941, "step": 4085 }, { "epoch": 1.0341685649202734, "grad_norm": 0.14339981973171234, "learning_rate": 1.57831207962161e-05, "loss": 0.5044, "step": 4086 }, { "epoch": 1.0344216654011642, "grad_norm": 0.153433695435524, "learning_rate": 1.5781167303447707e-05, "loss": 0.5216, "step": 4087 }, { "epoch": 1.0346747658820552, "grad_norm": 0.15680471062660217, "learning_rate": 1.5779213479257257e-05, "loss": 0.4983, "step": 4088 }, { "epoch": 1.034927866362946, "grad_norm": 0.1512002795934677, "learning_rate": 1.577725932375676e-05, "loss": 0.5029, "step": 4089 }, { "epoch": 1.035180966843837, "grad_norm": 0.15002396702766418, "learning_rate": 1.577530483705824e-05, "loss": 0.5093, "step": 4090 }, { "epoch": 1.035434067324728, "grad_norm": 0.1991529017686844, "learning_rate": 1.5773350019273744e-05, "loss": 0.5364, "step": 4091 }, { "epoch": 1.0356871678056188, "grad_norm": 0.15157683193683624, "learning_rate": 1.5771394870515336e-05, "loss": 0.4868, "step": 4092 }, { "epoch": 1.0359402682865098, "grad_norm": 0.1445728838443756, "learning_rate": 1.5769439390895102e-05, "loss": 0.492, "step": 4093 }, { "epoch": 1.0361933687674008, "grad_norm": 0.15292096138000488, "learning_rate": 1.576748358052515e-05, "loss": 0.554, "step": 4094 }, { "epoch": 1.0364464692482915, "grad_norm": 0.14507927000522614, "learning_rate": 1.5765527439517597e-05, "loss": 0.4691, "step": 4095 }, { "epoch": 1.0366995697291825, "grad_norm": 0.16536445915699005, "learning_rate": 1.5763570967984584e-05, "loss": 0.4985, "step": 4096 }, { "epoch": 1.0369526702100733, "grad_norm": 0.15084581077098846, "learning_rate": 1.5761614166038278e-05, "loss": 0.4966, "step": 4097 }, { "epoch": 1.0372057706909643, "grad_norm": 0.14399930834770203, "learning_rate": 1.5759657033790848e-05, "loss": 0.4898, "step": 4098 }, { "epoch": 1.0374588711718553, "grad_norm": 0.1572646051645279, "learning_rate": 1.57576995713545e-05, "loss": 0.5117, "step": 4099 }, { "epoch": 1.037711971652746, "grad_norm": 0.1451658457517624, "learning_rate": 1.575574177884145e-05, "loss": 0.4973, "step": 4100 }, { "epoch": 1.037965072133637, "grad_norm": 0.14905822277069092, "learning_rate": 1.5753783656363935e-05, "loss": 0.4737, "step": 4101 }, { "epoch": 1.0382181726145279, "grad_norm": 0.1622321754693985, "learning_rate": 1.5751825204034207e-05, "loss": 0.5186, "step": 4102 }, { "epoch": 1.0384712730954189, "grad_norm": 0.22775375843048096, "learning_rate": 1.574986642196454e-05, "loss": 0.4779, "step": 4103 }, { "epoch": 1.0387243735763099, "grad_norm": 0.1466921865940094, "learning_rate": 1.5747907310267225e-05, "loss": 0.5195, "step": 4104 }, { "epoch": 1.0389774740572006, "grad_norm": 0.14714843034744263, "learning_rate": 1.574594786905458e-05, "loss": 0.502, "step": 4105 }, { "epoch": 1.0392305745380916, "grad_norm": 0.14759936928749084, "learning_rate": 1.5743988098438934e-05, "loss": 0.5389, "step": 4106 }, { "epoch": 1.0394836750189826, "grad_norm": 0.148715078830719, "learning_rate": 1.574202799853263e-05, "loss": 0.5208, "step": 4107 }, { "epoch": 1.0397367754998734, "grad_norm": 0.14986270666122437, "learning_rate": 1.5740067569448045e-05, "loss": 0.5074, "step": 4108 }, { "epoch": 1.0399898759807644, "grad_norm": 0.15017493069171906, "learning_rate": 1.573810681129756e-05, "loss": 0.5137, "step": 4109 }, { "epoch": 1.0402429764616552, "grad_norm": 0.1507161259651184, "learning_rate": 1.5736145724193583e-05, "loss": 0.5541, "step": 4110 }, { "epoch": 1.0404960769425462, "grad_norm": 0.16290892660617828, "learning_rate": 1.5734184308248543e-05, "loss": 0.5229, "step": 4111 }, { "epoch": 1.0407491774234372, "grad_norm": 0.1546194702386856, "learning_rate": 1.5732222563574878e-05, "loss": 0.5088, "step": 4112 }, { "epoch": 1.041002277904328, "grad_norm": 0.14363987743854523, "learning_rate": 1.5730260490285053e-05, "loss": 0.4811, "step": 4113 }, { "epoch": 1.041255378385219, "grad_norm": 0.1470114141702652, "learning_rate": 1.572829808849155e-05, "loss": 0.5014, "step": 4114 }, { "epoch": 1.0415084788661098, "grad_norm": 0.15474075078964233, "learning_rate": 1.5726335358306864e-05, "loss": 0.5136, "step": 4115 }, { "epoch": 1.0417615793470008, "grad_norm": 0.15136638283729553, "learning_rate": 1.5724372299843526e-05, "loss": 0.5125, "step": 4116 }, { "epoch": 1.0420146798278918, "grad_norm": 0.14689023792743683, "learning_rate": 1.5722408913214063e-05, "loss": 0.4836, "step": 4117 }, { "epoch": 1.0422677803087825, "grad_norm": 0.1429336667060852, "learning_rate": 1.5720445198531034e-05, "loss": 0.4787, "step": 4118 }, { "epoch": 1.0425208807896735, "grad_norm": 0.15399478375911713, "learning_rate": 1.571848115590702e-05, "loss": 0.4976, "step": 4119 }, { "epoch": 1.0427739812705643, "grad_norm": 0.16309621930122375, "learning_rate": 1.571651678545461e-05, "loss": 0.4951, "step": 4120 }, { "epoch": 1.0430270817514553, "grad_norm": 0.15290434658527374, "learning_rate": 1.571455208728642e-05, "loss": 0.5291, "step": 4121 }, { "epoch": 1.0432801822323463, "grad_norm": 0.1464855670928955, "learning_rate": 1.571258706151508e-05, "loss": 0.5197, "step": 4122 }, { "epoch": 1.043533282713237, "grad_norm": 0.15115946531295776, "learning_rate": 1.571062170825324e-05, "loss": 0.5211, "step": 4123 }, { "epoch": 1.043786383194128, "grad_norm": 0.14781180024147034, "learning_rate": 1.570865602761357e-05, "loss": 0.5031, "step": 4124 }, { "epoch": 1.044039483675019, "grad_norm": 0.1711064726114273, "learning_rate": 1.5706690019708762e-05, "loss": 0.4723, "step": 4125 }, { "epoch": 1.0442925841559099, "grad_norm": 0.14267325401306152, "learning_rate": 1.570472368465152e-05, "loss": 0.5294, "step": 4126 }, { "epoch": 1.0445456846368009, "grad_norm": 0.14643965661525726, "learning_rate": 1.570275702255457e-05, "loss": 0.5124, "step": 4127 }, { "epoch": 1.0447987851176916, "grad_norm": 0.15579791367053986, "learning_rate": 1.5700790033530656e-05, "loss": 0.5058, "step": 4128 }, { "epoch": 1.0450518855985826, "grad_norm": 0.15884539484977722, "learning_rate": 1.5698822717692544e-05, "loss": 0.5063, "step": 4129 }, { "epoch": 1.0453049860794736, "grad_norm": 0.15211458504199982, "learning_rate": 1.5696855075153014e-05, "loss": 0.512, "step": 4130 }, { "epoch": 1.0455580865603644, "grad_norm": 0.1490122228860855, "learning_rate": 1.5694887106024868e-05, "loss": 0.5073, "step": 4131 }, { "epoch": 1.0458111870412554, "grad_norm": 0.14976119995117188, "learning_rate": 1.569291881042092e-05, "loss": 0.5067, "step": 4132 }, { "epoch": 1.0460642875221462, "grad_norm": 0.15558256208896637, "learning_rate": 1.569095018845402e-05, "loss": 0.5092, "step": 4133 }, { "epoch": 1.0463173880030372, "grad_norm": 0.15668827295303345, "learning_rate": 1.5688981240237012e-05, "loss": 0.5316, "step": 4134 }, { "epoch": 1.0465704884839282, "grad_norm": 0.14536906778812408, "learning_rate": 1.5687011965882777e-05, "loss": 0.4913, "step": 4135 }, { "epoch": 1.046823588964819, "grad_norm": 0.14927713572978973, "learning_rate": 1.568504236550421e-05, "loss": 0.5021, "step": 4136 }, { "epoch": 1.04707668944571, "grad_norm": 0.15169309079647064, "learning_rate": 1.5683072439214226e-05, "loss": 0.4932, "step": 4137 }, { "epoch": 1.047329789926601, "grad_norm": 0.14886653423309326, "learning_rate": 1.5681102187125753e-05, "loss": 0.4947, "step": 4138 }, { "epoch": 1.0475828904074918, "grad_norm": 0.14623379707336426, "learning_rate": 1.567913160935174e-05, "loss": 0.5032, "step": 4139 }, { "epoch": 1.0478359908883828, "grad_norm": 0.3044935464859009, "learning_rate": 1.567716070600516e-05, "loss": 0.5166, "step": 4140 }, { "epoch": 1.0480890913692735, "grad_norm": 0.14737936854362488, "learning_rate": 1.5675189477199e-05, "loss": 0.5129, "step": 4141 }, { "epoch": 1.0483421918501645, "grad_norm": 0.24766947329044342, "learning_rate": 1.5673217923046268e-05, "loss": 0.5123, "step": 4142 }, { "epoch": 1.0485952923310555, "grad_norm": 0.19555969536304474, "learning_rate": 1.5671246043659984e-05, "loss": 0.4867, "step": 4143 }, { "epoch": 1.0488483928119463, "grad_norm": 0.15102633833885193, "learning_rate": 1.5669273839153195e-05, "loss": 0.4916, "step": 4144 }, { "epoch": 1.0491014932928373, "grad_norm": 0.15004859864711761, "learning_rate": 1.5667301309638966e-05, "loss": 0.4914, "step": 4145 }, { "epoch": 1.049354593773728, "grad_norm": 0.1494174599647522, "learning_rate": 1.566532845523037e-05, "loss": 0.5327, "step": 4146 }, { "epoch": 1.049607694254619, "grad_norm": 0.1474965214729309, "learning_rate": 1.5663355276040513e-05, "loss": 0.5044, "step": 4147 }, { "epoch": 1.04986079473551, "grad_norm": 0.14412888884544373, "learning_rate": 1.566138177218251e-05, "loss": 0.497, "step": 4148 }, { "epoch": 1.0501138952164009, "grad_norm": 0.14631447196006775, "learning_rate": 1.56594079437695e-05, "loss": 0.4975, "step": 4149 }, { "epoch": 1.0503669956972919, "grad_norm": 0.14624115824699402, "learning_rate": 1.565743379091464e-05, "loss": 0.5165, "step": 4150 }, { "epoch": 1.0506200961781826, "grad_norm": 0.1437530368566513, "learning_rate": 1.5655459313731097e-05, "loss": 0.4536, "step": 4151 }, { "epoch": 1.0508731966590736, "grad_norm": 0.14683012664318085, "learning_rate": 1.565348451233207e-05, "loss": 0.5134, "step": 4152 }, { "epoch": 1.0511262971399646, "grad_norm": 0.16513925790786743, "learning_rate": 1.565150938683077e-05, "loss": 0.5214, "step": 4153 }, { "epoch": 1.0513793976208554, "grad_norm": 0.15082743763923645, "learning_rate": 1.5649533937340426e-05, "loss": 0.5073, "step": 4154 }, { "epoch": 1.0516324981017464, "grad_norm": 0.15586961805820465, "learning_rate": 1.564755816397428e-05, "loss": 0.4962, "step": 4155 }, { "epoch": 1.0518855985826372, "grad_norm": 0.15412850677967072, "learning_rate": 1.564558206684561e-05, "loss": 0.5245, "step": 4156 }, { "epoch": 1.0521386990635282, "grad_norm": 0.15349099040031433, "learning_rate": 1.5643605646067693e-05, "loss": 0.4836, "step": 4157 }, { "epoch": 1.0523917995444192, "grad_norm": 0.15539869666099548, "learning_rate": 1.5641628901753836e-05, "loss": 0.4986, "step": 4158 }, { "epoch": 1.05264490002531, "grad_norm": 0.1477925032377243, "learning_rate": 1.5639651834017362e-05, "loss": 0.5026, "step": 4159 }, { "epoch": 1.052898000506201, "grad_norm": 0.14729399979114532, "learning_rate": 1.5637674442971607e-05, "loss": 0.4857, "step": 4160 }, { "epoch": 1.053151100987092, "grad_norm": 0.1509435623884201, "learning_rate": 1.5635696728729942e-05, "loss": 0.5022, "step": 4161 }, { "epoch": 1.0534042014679827, "grad_norm": 0.1498592495918274, "learning_rate": 1.5633718691405736e-05, "loss": 0.5141, "step": 4162 }, { "epoch": 1.0536573019488737, "grad_norm": 0.3813425898551941, "learning_rate": 1.563174033111239e-05, "loss": 0.5108, "step": 4163 }, { "epoch": 1.0539104024297645, "grad_norm": 0.15156088769435883, "learning_rate": 1.5629761647963312e-05, "loss": 0.4961, "step": 4164 }, { "epoch": 1.0541635029106555, "grad_norm": 0.15539757907390594, "learning_rate": 1.5627782642071944e-05, "loss": 0.4902, "step": 4165 }, { "epoch": 1.0544166033915465, "grad_norm": 0.15164312720298767, "learning_rate": 1.5625803313551733e-05, "loss": 0.5055, "step": 4166 }, { "epoch": 1.0546697038724373, "grad_norm": 0.1515582799911499, "learning_rate": 1.5623823662516155e-05, "loss": 0.5054, "step": 4167 }, { "epoch": 1.0549228043533283, "grad_norm": 0.14880234003067017, "learning_rate": 1.562184368907869e-05, "loss": 0.5208, "step": 4168 }, { "epoch": 1.055175904834219, "grad_norm": 0.1762043833732605, "learning_rate": 1.5619863393352856e-05, "loss": 0.5084, "step": 4169 }, { "epoch": 1.05542900531511, "grad_norm": 0.14981429278850555, "learning_rate": 1.5617882775452174e-05, "loss": 0.5073, "step": 4170 }, { "epoch": 1.055682105796001, "grad_norm": 0.14569419622421265, "learning_rate": 1.561590183549019e-05, "loss": 0.4942, "step": 4171 }, { "epoch": 1.0559352062768919, "grad_norm": 0.1440693736076355, "learning_rate": 1.5613920573580467e-05, "loss": 0.5038, "step": 4172 }, { "epoch": 1.0561883067577829, "grad_norm": 0.15029017627239227, "learning_rate": 1.5611938989836583e-05, "loss": 0.5178, "step": 4173 }, { "epoch": 1.0564414072386739, "grad_norm": 0.16121579706668854, "learning_rate": 1.5609957084372146e-05, "loss": 0.4912, "step": 4174 }, { "epoch": 1.0566945077195646, "grad_norm": 0.1529855728149414, "learning_rate": 1.560797485730076e-05, "loss": 0.4947, "step": 4175 }, { "epoch": 1.0569476082004556, "grad_norm": 0.14877949655056, "learning_rate": 1.5605992308736083e-05, "loss": 0.498, "step": 4176 }, { "epoch": 1.0572007086813464, "grad_norm": 0.15548087656497955, "learning_rate": 1.560400943879175e-05, "loss": 0.5125, "step": 4177 }, { "epoch": 1.0574538091622374, "grad_norm": 0.15288685262203217, "learning_rate": 1.5602026247581447e-05, "loss": 0.518, "step": 4178 }, { "epoch": 1.0577069096431284, "grad_norm": 0.15324977040290833, "learning_rate": 1.5600042735218863e-05, "loss": 0.5058, "step": 4179 }, { "epoch": 1.0579600101240192, "grad_norm": 0.1547747403383255, "learning_rate": 1.559805890181771e-05, "loss": 0.4747, "step": 4180 }, { "epoch": 1.0582131106049102, "grad_norm": 0.15408845245838165, "learning_rate": 1.559607474749171e-05, "loss": 0.4881, "step": 4181 }, { "epoch": 1.058466211085801, "grad_norm": 0.15396107733249664, "learning_rate": 1.5594090272354618e-05, "loss": 0.5225, "step": 4182 }, { "epoch": 1.058719311566692, "grad_norm": 0.14158649742603302, "learning_rate": 1.5592105476520197e-05, "loss": 0.4819, "step": 4183 }, { "epoch": 1.058972412047583, "grad_norm": 0.15223930776119232, "learning_rate": 1.559012036010223e-05, "loss": 0.5146, "step": 4184 }, { "epoch": 1.0592255125284737, "grad_norm": 0.15238091349601746, "learning_rate": 1.558813492321452e-05, "loss": 0.4814, "step": 4185 }, { "epoch": 1.0594786130093647, "grad_norm": 0.1528061330318451, "learning_rate": 1.5586149165970892e-05, "loss": 0.4998, "step": 4186 }, { "epoch": 1.0597317134902555, "grad_norm": 0.1513298898935318, "learning_rate": 1.558416308848518e-05, "loss": 0.5011, "step": 4187 }, { "epoch": 1.0599848139711465, "grad_norm": 0.14777547121047974, "learning_rate": 1.5582176690871245e-05, "loss": 0.5023, "step": 4188 }, { "epoch": 1.0602379144520375, "grad_norm": 0.14531627297401428, "learning_rate": 1.5580189973242957e-05, "loss": 0.4965, "step": 4189 }, { "epoch": 1.0604910149329283, "grad_norm": 0.15040677785873413, "learning_rate": 1.557820293571422e-05, "loss": 0.5282, "step": 4190 }, { "epoch": 1.0607441154138193, "grad_norm": 0.15623944997787476, "learning_rate": 1.557621557839894e-05, "loss": 0.4975, "step": 4191 }, { "epoch": 1.0609972158947103, "grad_norm": 0.14993377029895782, "learning_rate": 1.5574227901411048e-05, "loss": 0.4999, "step": 4192 }, { "epoch": 1.061250316375601, "grad_norm": 0.15711429715156555, "learning_rate": 1.5572239904864494e-05, "loss": 0.4988, "step": 4193 }, { "epoch": 1.061503416856492, "grad_norm": 0.149485781788826, "learning_rate": 1.557025158887325e-05, "loss": 0.4793, "step": 4194 }, { "epoch": 1.0617565173373829, "grad_norm": 0.14863301813602448, "learning_rate": 1.5568262953551295e-05, "loss": 0.514, "step": 4195 }, { "epoch": 1.0620096178182739, "grad_norm": 0.15083107352256775, "learning_rate": 1.5566273999012637e-05, "loss": 0.5164, "step": 4196 }, { "epoch": 1.0622627182991649, "grad_norm": 0.1466355174779892, "learning_rate": 1.55642847253713e-05, "loss": 0.49, "step": 4197 }, { "epoch": 1.0625158187800556, "grad_norm": 0.1489817351102829, "learning_rate": 1.556229513274132e-05, "loss": 0.507, "step": 4198 }, { "epoch": 1.0627689192609466, "grad_norm": 0.15065939724445343, "learning_rate": 1.5560305221236764e-05, "loss": 0.5129, "step": 4199 }, { "epoch": 1.0630220197418374, "grad_norm": 0.16333909332752228, "learning_rate": 1.55583149909717e-05, "loss": 0.5197, "step": 4200 }, { "epoch": 1.0632751202227284, "grad_norm": 0.148971825838089, "learning_rate": 1.5556324442060228e-05, "loss": 0.5015, "step": 4201 }, { "epoch": 1.0635282207036194, "grad_norm": 0.15040379762649536, "learning_rate": 1.5554333574616463e-05, "loss": 0.5156, "step": 4202 }, { "epoch": 1.0637813211845102, "grad_norm": 0.15669579803943634, "learning_rate": 1.5552342388754535e-05, "loss": 0.5113, "step": 4203 }, { "epoch": 1.0640344216654012, "grad_norm": 0.1513897031545639, "learning_rate": 1.5550350884588595e-05, "loss": 0.504, "step": 4204 }, { "epoch": 1.0642875221462922, "grad_norm": 0.15204906463623047, "learning_rate": 1.5548359062232818e-05, "loss": 0.494, "step": 4205 }, { "epoch": 1.064540622627183, "grad_norm": 0.1490669995546341, "learning_rate": 1.554636692180138e-05, "loss": 0.4935, "step": 4206 }, { "epoch": 1.064793723108074, "grad_norm": 0.14637012779712677, "learning_rate": 1.5544374463408495e-05, "loss": 0.5125, "step": 4207 }, { "epoch": 1.0650468235889647, "grad_norm": 0.15354983508586884, "learning_rate": 1.554238168716838e-05, "loss": 0.522, "step": 4208 }, { "epoch": 1.0652999240698557, "grad_norm": 0.1484934389591217, "learning_rate": 1.5540388593195284e-05, "loss": 0.4991, "step": 4209 }, { "epoch": 1.0655530245507467, "grad_norm": 0.1773085743188858, "learning_rate": 1.5538395181603457e-05, "loss": 0.5204, "step": 4210 }, { "epoch": 1.0658061250316375, "grad_norm": 0.15090534090995789, "learning_rate": 1.5536401452507188e-05, "loss": 0.5087, "step": 4211 }, { "epoch": 1.0660592255125285, "grad_norm": 0.14725615084171295, "learning_rate": 1.5534407406020765e-05, "loss": 0.5067, "step": 4212 }, { "epoch": 1.0663123259934193, "grad_norm": 0.14357440173625946, "learning_rate": 1.5532413042258504e-05, "loss": 0.4968, "step": 4213 }, { "epoch": 1.0665654264743103, "grad_norm": 0.14990293979644775, "learning_rate": 1.553041836133474e-05, "loss": 0.4853, "step": 4214 }, { "epoch": 1.0668185269552013, "grad_norm": 0.14226917922496796, "learning_rate": 1.552842336336382e-05, "loss": 0.4953, "step": 4215 }, { "epoch": 1.067071627436092, "grad_norm": 0.14654865860939026, "learning_rate": 1.552642804846012e-05, "loss": 0.4982, "step": 4216 }, { "epoch": 1.067324727916983, "grad_norm": 0.15234434604644775, "learning_rate": 1.5524432416738022e-05, "loss": 0.5384, "step": 4217 }, { "epoch": 1.0675778283978739, "grad_norm": 0.15765413641929626, "learning_rate": 1.552243646831193e-05, "loss": 0.5092, "step": 4218 }, { "epoch": 1.0678309288787649, "grad_norm": 0.2098330855369568, "learning_rate": 1.5520440203296273e-05, "loss": 0.5223, "step": 4219 }, { "epoch": 1.0680840293596559, "grad_norm": 0.1499241590499878, "learning_rate": 1.551844362180549e-05, "loss": 0.491, "step": 4220 }, { "epoch": 1.0683371298405466, "grad_norm": 0.1495862901210785, "learning_rate": 1.5516446723954034e-05, "loss": 0.514, "step": 4221 }, { "epoch": 1.0685902303214376, "grad_norm": 0.15917716920375824, "learning_rate": 1.5514449509856394e-05, "loss": 0.5303, "step": 4222 }, { "epoch": 1.0688433308023286, "grad_norm": 0.14702540636062622, "learning_rate": 1.5512451979627058e-05, "loss": 0.5049, "step": 4223 }, { "epoch": 1.0690964312832194, "grad_norm": 0.15018105506896973, "learning_rate": 1.5510454133380548e-05, "loss": 0.4995, "step": 4224 }, { "epoch": 1.0693495317641104, "grad_norm": 0.1564035564661026, "learning_rate": 1.5508455971231385e-05, "loss": 0.4999, "step": 4225 }, { "epoch": 1.0696026322450012, "grad_norm": 0.15000370144844055, "learning_rate": 1.550645749329413e-05, "loss": 0.4999, "step": 4226 }, { "epoch": 1.0698557327258922, "grad_norm": 0.17009828984737396, "learning_rate": 1.550445869968335e-05, "loss": 0.5128, "step": 4227 }, { "epoch": 1.0701088332067832, "grad_norm": 0.16115716099739075, "learning_rate": 1.5502459590513624e-05, "loss": 0.5086, "step": 4228 }, { "epoch": 1.070361933687674, "grad_norm": 0.1617487668991089, "learning_rate": 1.5500460165899564e-05, "loss": 0.5163, "step": 4229 }, { "epoch": 1.070615034168565, "grad_norm": 0.15228186547756195, "learning_rate": 1.549846042595579e-05, "loss": 0.5204, "step": 4230 }, { "epoch": 1.0708681346494557, "grad_norm": 0.1511366218328476, "learning_rate": 1.5496460370796943e-05, "loss": 0.5152, "step": 4231 }, { "epoch": 1.0711212351303467, "grad_norm": 0.15335188806056976, "learning_rate": 1.5494460000537682e-05, "loss": 0.53, "step": 4232 }, { "epoch": 1.0713743356112377, "grad_norm": 0.15021751821041107, "learning_rate": 1.5492459315292686e-05, "loss": 0.5097, "step": 4233 }, { "epoch": 1.0716274360921285, "grad_norm": 0.1655190885066986, "learning_rate": 1.549045831517665e-05, "loss": 0.4939, "step": 4234 }, { "epoch": 1.0718805365730195, "grad_norm": 0.1489643007516861, "learning_rate": 1.5488457000304283e-05, "loss": 0.4814, "step": 4235 }, { "epoch": 1.0721336370539105, "grad_norm": 0.14702825248241425, "learning_rate": 1.5486455370790318e-05, "loss": 0.5096, "step": 4236 }, { "epoch": 1.0723867375348013, "grad_norm": 0.15439437329769135, "learning_rate": 1.5484453426749506e-05, "loss": 0.5044, "step": 4237 }, { "epoch": 1.0726398380156923, "grad_norm": 0.15240204334259033, "learning_rate": 1.548245116829661e-05, "loss": 0.5148, "step": 4238 }, { "epoch": 1.072892938496583, "grad_norm": 0.1802472472190857, "learning_rate": 1.548044859554642e-05, "loss": 0.5088, "step": 4239 }, { "epoch": 1.073146038977474, "grad_norm": 0.14922311902046204, "learning_rate": 1.5478445708613737e-05, "loss": 0.5107, "step": 4240 }, { "epoch": 1.073399139458365, "grad_norm": 0.148959681391716, "learning_rate": 1.5476442507613386e-05, "loss": 0.5139, "step": 4241 }, { "epoch": 1.0736522399392558, "grad_norm": 0.16022267937660217, "learning_rate": 1.5474438992660197e-05, "loss": 0.5041, "step": 4242 }, { "epoch": 1.0739053404201468, "grad_norm": 0.15084108710289001, "learning_rate": 1.5472435163869034e-05, "loss": 0.4864, "step": 4243 }, { "epoch": 1.0741584409010376, "grad_norm": 0.1523846834897995, "learning_rate": 1.5470431021354776e-05, "loss": 0.522, "step": 4244 }, { "epoch": 1.0744115413819286, "grad_norm": 0.14584098756313324, "learning_rate": 1.546842656523231e-05, "loss": 0.5045, "step": 4245 }, { "epoch": 1.0746646418628196, "grad_norm": 0.15042084455490112, "learning_rate": 1.5466421795616545e-05, "loss": 0.5355, "step": 4246 }, { "epoch": 1.0749177423437104, "grad_norm": 0.15214994549751282, "learning_rate": 1.546441671262242e-05, "loss": 0.492, "step": 4247 }, { "epoch": 1.0751708428246014, "grad_norm": 0.1525678187608719, "learning_rate": 1.5462411316364874e-05, "loss": 0.5294, "step": 4248 }, { "epoch": 1.0754239433054922, "grad_norm": 0.14767494797706604, "learning_rate": 1.5460405606958872e-05, "loss": 0.5035, "step": 4249 }, { "epoch": 1.0756770437863832, "grad_norm": 0.15366555750370026, "learning_rate": 1.54583995845194e-05, "loss": 0.4867, "step": 4250 }, { "epoch": 1.0759301442672742, "grad_norm": 0.18078544735908508, "learning_rate": 1.545639324916146e-05, "loss": 0.516, "step": 4251 }, { "epoch": 1.076183244748165, "grad_norm": 0.14964306354522705, "learning_rate": 1.5454386601000072e-05, "loss": 0.5056, "step": 4252 }, { "epoch": 1.076436345229056, "grad_norm": 0.1520017832517624, "learning_rate": 1.5452379640150267e-05, "loss": 0.5001, "step": 4253 }, { "epoch": 1.0766894457099467, "grad_norm": 0.15898790955543518, "learning_rate": 1.5450372366727103e-05, "loss": 0.5068, "step": 4254 }, { "epoch": 1.0769425461908377, "grad_norm": 0.1687747985124588, "learning_rate": 1.544836478084565e-05, "loss": 0.4794, "step": 4255 }, { "epoch": 1.0771956466717287, "grad_norm": 0.15834957361221313, "learning_rate": 1.5446356882621007e-05, "loss": 0.503, "step": 4256 }, { "epoch": 1.0774487471526195, "grad_norm": 0.1469801813364029, "learning_rate": 1.5444348672168272e-05, "loss": 0.4859, "step": 4257 }, { "epoch": 1.0777018476335105, "grad_norm": 0.40214523673057556, "learning_rate": 1.544234014960258e-05, "loss": 0.5004, "step": 4258 }, { "epoch": 1.0779549481144015, "grad_norm": 0.15620329976081848, "learning_rate": 1.544033131503907e-05, "loss": 0.5084, "step": 4259 }, { "epoch": 1.0782080485952923, "grad_norm": 0.1480400413274765, "learning_rate": 1.5438322168592907e-05, "loss": 0.5047, "step": 4260 }, { "epoch": 1.0784611490761833, "grad_norm": 0.15529435873031616, "learning_rate": 1.5436312710379268e-05, "loss": 0.5361, "step": 4261 }, { "epoch": 1.078714249557074, "grad_norm": 0.1508270651102066, "learning_rate": 1.5434302940513358e-05, "loss": 0.483, "step": 4262 }, { "epoch": 1.078967350037965, "grad_norm": 0.1488027721643448, "learning_rate": 1.5432292859110383e-05, "loss": 0.5261, "step": 4263 }, { "epoch": 1.079220450518856, "grad_norm": 0.14651761949062347, "learning_rate": 1.5430282466285584e-05, "loss": 0.4913, "step": 4264 }, { "epoch": 1.0794735509997468, "grad_norm": 0.15578430891036987, "learning_rate": 1.5428271762154208e-05, "loss": 0.4734, "step": 4265 }, { "epoch": 1.0797266514806378, "grad_norm": 0.1679522544145584, "learning_rate": 1.542626074683153e-05, "loss": 0.5009, "step": 4266 }, { "epoch": 1.0799797519615288, "grad_norm": 0.15412485599517822, "learning_rate": 1.542424942043283e-05, "loss": 0.5141, "step": 4267 }, { "epoch": 1.0802328524424196, "grad_norm": 0.15280230343341827, "learning_rate": 1.5422237783073416e-05, "loss": 0.4961, "step": 4268 }, { "epoch": 1.0804859529233106, "grad_norm": 0.1616753488779068, "learning_rate": 1.542022583486862e-05, "loss": 0.5121, "step": 4269 }, { "epoch": 1.0807390534042014, "grad_norm": 0.15002407133579254, "learning_rate": 1.5418213575933764e-05, "loss": 0.4974, "step": 4270 }, { "epoch": 1.0809921538850924, "grad_norm": 0.15114815533161163, "learning_rate": 1.5416201006384226e-05, "loss": 0.5079, "step": 4271 }, { "epoch": 1.0812452543659834, "grad_norm": 0.1660788357257843, "learning_rate": 1.5414188126335367e-05, "loss": 0.4983, "step": 4272 }, { "epoch": 1.0814983548468742, "grad_norm": 0.15587447583675385, "learning_rate": 1.541217493590259e-05, "loss": 0.496, "step": 4273 }, { "epoch": 1.0817514553277652, "grad_norm": 0.15363775193691254, "learning_rate": 1.5410161435201304e-05, "loss": 0.5068, "step": 4274 }, { "epoch": 1.082004555808656, "grad_norm": 0.1492217630147934, "learning_rate": 1.540814762434694e-05, "loss": 0.4877, "step": 4275 }, { "epoch": 1.082257656289547, "grad_norm": 0.1498951017856598, "learning_rate": 1.5406133503454945e-05, "loss": 0.4899, "step": 4276 }, { "epoch": 1.082510756770438, "grad_norm": 0.15326589345932007, "learning_rate": 1.5404119072640786e-05, "loss": 0.4973, "step": 4277 }, { "epoch": 1.0827638572513287, "grad_norm": 0.1507866531610489, "learning_rate": 1.5402104332019934e-05, "loss": 0.516, "step": 4278 }, { "epoch": 1.0830169577322197, "grad_norm": 0.15374507009983063, "learning_rate": 1.540008928170791e-05, "loss": 0.505, "step": 4279 }, { "epoch": 1.0832700582131105, "grad_norm": 0.15549008548259735, "learning_rate": 1.5398073921820224e-05, "loss": 0.5045, "step": 4280 }, { "epoch": 1.0835231586940015, "grad_norm": 0.15075495839118958, "learning_rate": 1.5396058252472402e-05, "loss": 0.481, "step": 4281 }, { "epoch": 1.0837762591748925, "grad_norm": 0.14552341401576996, "learning_rate": 1.539404227378001e-05, "loss": 0.4654, "step": 4282 }, { "epoch": 1.0840293596557833, "grad_norm": 0.14752675592899323, "learning_rate": 1.5392025985858618e-05, "loss": 0.4939, "step": 4283 }, { "epoch": 1.0842824601366743, "grad_norm": 0.15652896463871002, "learning_rate": 1.539000938882381e-05, "loss": 0.4998, "step": 4284 }, { "epoch": 1.084535560617565, "grad_norm": 0.147396981716156, "learning_rate": 1.5387992482791206e-05, "loss": 0.4909, "step": 4285 }, { "epoch": 1.084788661098456, "grad_norm": 0.1596512347459793, "learning_rate": 1.5385975267876416e-05, "loss": 0.497, "step": 4286 }, { "epoch": 1.085041761579347, "grad_norm": 0.1776268184185028, "learning_rate": 1.538395774419509e-05, "loss": 0.5088, "step": 4287 }, { "epoch": 1.0852948620602378, "grad_norm": 0.15525583922863007, "learning_rate": 1.5381939911862883e-05, "loss": 0.5172, "step": 4288 }, { "epoch": 1.0855479625411288, "grad_norm": 0.1514233946800232, "learning_rate": 1.5379921770995486e-05, "loss": 0.517, "step": 4289 }, { "epoch": 1.0858010630220198, "grad_norm": 0.15112116932868958, "learning_rate": 1.5377903321708577e-05, "loss": 0.4903, "step": 4290 }, { "epoch": 1.0860541635029106, "grad_norm": 0.15315553545951843, "learning_rate": 1.5375884564117884e-05, "loss": 0.4987, "step": 4291 }, { "epoch": 1.0863072639838016, "grad_norm": 0.1523180902004242, "learning_rate": 1.5373865498339132e-05, "loss": 0.5161, "step": 4292 }, { "epoch": 1.0865603644646924, "grad_norm": 0.15014992654323578, "learning_rate": 1.5371846124488066e-05, "loss": 0.5093, "step": 4293 }, { "epoch": 1.0868134649455834, "grad_norm": 0.15175999701023102, "learning_rate": 1.5369826442680463e-05, "loss": 0.4979, "step": 4294 }, { "epoch": 1.0870665654264744, "grad_norm": 0.15095563232898712, "learning_rate": 1.5367806453032098e-05, "loss": 0.5173, "step": 4295 }, { "epoch": 1.0873196659073652, "grad_norm": 0.19802963733673096, "learning_rate": 1.5365786155658777e-05, "loss": 0.5347, "step": 4296 }, { "epoch": 1.0875727663882562, "grad_norm": 0.15580947697162628, "learning_rate": 1.5363765550676313e-05, "loss": 0.5047, "step": 4297 }, { "epoch": 1.087825866869147, "grad_norm": 0.1568150371313095, "learning_rate": 1.536174463820055e-05, "loss": 0.5182, "step": 4298 }, { "epoch": 1.088078967350038, "grad_norm": 0.15076953172683716, "learning_rate": 1.5359723418347344e-05, "loss": 0.4904, "step": 4299 }, { "epoch": 1.088332067830929, "grad_norm": 0.1487162709236145, "learning_rate": 1.5357701891232556e-05, "loss": 0.4652, "step": 4300 }, { "epoch": 1.0885851683118197, "grad_norm": 0.14819073677062988, "learning_rate": 1.535568005697209e-05, "loss": 0.5061, "step": 4301 }, { "epoch": 1.0888382687927107, "grad_norm": 0.15536732971668243, "learning_rate": 1.5353657915681848e-05, "loss": 0.5064, "step": 4302 }, { "epoch": 1.0890913692736017, "grad_norm": 0.15237945318222046, "learning_rate": 1.535163546747775e-05, "loss": 0.5029, "step": 4303 }, { "epoch": 1.0893444697544925, "grad_norm": 0.15036161243915558, "learning_rate": 1.5349612712475742e-05, "loss": 0.498, "step": 4304 }, { "epoch": 1.0895975702353835, "grad_norm": 0.1635569930076599, "learning_rate": 1.5347589650791793e-05, "loss": 0.4945, "step": 4305 }, { "epoch": 1.0898506707162743, "grad_norm": 0.1493002325296402, "learning_rate": 1.5345566282541864e-05, "loss": 0.4983, "step": 4306 }, { "epoch": 1.0901037711971653, "grad_norm": 0.1515049785375595, "learning_rate": 1.5343542607841967e-05, "loss": 0.5172, "step": 4307 }, { "epoch": 1.0903568716780563, "grad_norm": 0.15353552997112274, "learning_rate": 1.53415186268081e-05, "loss": 0.5095, "step": 4308 }, { "epoch": 1.090609972158947, "grad_norm": 0.1567031741142273, "learning_rate": 1.5339494339556306e-05, "loss": 0.4993, "step": 4309 }, { "epoch": 1.090863072639838, "grad_norm": 0.1488611251115799, "learning_rate": 1.533746974620262e-05, "loss": 0.4791, "step": 4310 }, { "epoch": 1.0911161731207288, "grad_norm": 0.14791440963745117, "learning_rate": 1.5335444846863127e-05, "loss": 0.4731, "step": 4311 }, { "epoch": 1.0913692736016198, "grad_norm": 0.15785950422286987, "learning_rate": 1.533341964165389e-05, "loss": 0.4961, "step": 4312 }, { "epoch": 1.0916223740825108, "grad_norm": 0.14618073403835297, "learning_rate": 1.533139413069102e-05, "loss": 0.5051, "step": 4313 }, { "epoch": 1.0918754745634016, "grad_norm": 0.15635164082050323, "learning_rate": 1.5329368314090637e-05, "loss": 0.548, "step": 4314 }, { "epoch": 1.0921285750442926, "grad_norm": 0.16465643048286438, "learning_rate": 1.532734219196887e-05, "loss": 0.5079, "step": 4315 }, { "epoch": 1.0923816755251834, "grad_norm": 0.14684076607227325, "learning_rate": 1.5325315764441876e-05, "loss": 0.4775, "step": 4316 }, { "epoch": 1.0926347760060744, "grad_norm": 0.15312586724758148, "learning_rate": 1.5323289031625828e-05, "loss": 0.5071, "step": 4317 }, { "epoch": 1.0928878764869654, "grad_norm": 0.1545277237892151, "learning_rate": 1.532126199363691e-05, "loss": 0.5069, "step": 4318 }, { "epoch": 1.0931409769678562, "grad_norm": 0.15354938805103302, "learning_rate": 1.531923465059133e-05, "loss": 0.503, "step": 4319 }, { "epoch": 1.0933940774487472, "grad_norm": 0.1457854062318802, "learning_rate": 1.531720700260531e-05, "loss": 0.5032, "step": 4320 }, { "epoch": 1.0936471779296382, "grad_norm": 0.15186402201652527, "learning_rate": 1.5315179049795093e-05, "loss": 0.5045, "step": 4321 }, { "epoch": 1.093900278410529, "grad_norm": 0.1609475314617157, "learning_rate": 1.5313150792276934e-05, "loss": 0.4849, "step": 4322 }, { "epoch": 1.09415337889142, "grad_norm": 0.14813046157360077, "learning_rate": 1.5311122230167112e-05, "loss": 0.5016, "step": 4323 }, { "epoch": 1.0944064793723107, "grad_norm": 0.15004929900169373, "learning_rate": 1.530909336358192e-05, "loss": 0.4753, "step": 4324 }, { "epoch": 1.0946595798532017, "grad_norm": 0.15605764091014862, "learning_rate": 1.5307064192637663e-05, "loss": 0.5047, "step": 4325 }, { "epoch": 1.0949126803340927, "grad_norm": 0.15918521583080292, "learning_rate": 1.530503471745068e-05, "loss": 0.5028, "step": 4326 }, { "epoch": 1.0951657808149835, "grad_norm": 0.14826957881450653, "learning_rate": 1.5303004938137304e-05, "loss": 0.4891, "step": 4327 }, { "epoch": 1.0954188812958745, "grad_norm": 0.14997729659080505, "learning_rate": 1.5300974854813907e-05, "loss": 0.4905, "step": 4328 }, { "epoch": 1.0956719817767653, "grad_norm": 0.14951911568641663, "learning_rate": 1.5298944467596865e-05, "loss": 0.4881, "step": 4329 }, { "epoch": 1.0959250822576563, "grad_norm": 0.15354228019714355, "learning_rate": 1.529691377660258e-05, "loss": 0.5004, "step": 4330 }, { "epoch": 1.0961781827385473, "grad_norm": 0.1564033478498459, "learning_rate": 1.529488278194746e-05, "loss": 0.5129, "step": 4331 }, { "epoch": 1.096431283219438, "grad_norm": 0.16254372894763947, "learning_rate": 1.5292851483747943e-05, "loss": 0.5024, "step": 4332 }, { "epoch": 1.096684383700329, "grad_norm": 0.1498011350631714, "learning_rate": 1.529081988212048e-05, "loss": 0.477, "step": 4333 }, { "epoch": 1.09693748418122, "grad_norm": 0.14722029864788055, "learning_rate": 1.528878797718153e-05, "loss": 0.5157, "step": 4334 }, { "epoch": 1.0971905846621108, "grad_norm": 0.15843774378299713, "learning_rate": 1.528675576904759e-05, "loss": 0.5063, "step": 4335 }, { "epoch": 1.0974436851430018, "grad_norm": 0.15113842487335205, "learning_rate": 1.5284723257835158e-05, "loss": 0.4957, "step": 4336 }, { "epoch": 1.0976967856238926, "grad_norm": 0.1530185043811798, "learning_rate": 1.5282690443660748e-05, "loss": 0.5106, "step": 4337 }, { "epoch": 1.0979498861047836, "grad_norm": 0.1548159122467041, "learning_rate": 1.52806573266409e-05, "loss": 0.4995, "step": 4338 }, { "epoch": 1.0982029865856746, "grad_norm": 0.1565110832452774, "learning_rate": 1.527862390689217e-05, "loss": 0.5086, "step": 4339 }, { "epoch": 1.0984560870665654, "grad_norm": 0.15071925520896912, "learning_rate": 1.527659018453113e-05, "loss": 0.534, "step": 4340 }, { "epoch": 1.0987091875474564, "grad_norm": 0.1554129421710968, "learning_rate": 1.5274556159674365e-05, "loss": 0.5075, "step": 4341 }, { "epoch": 1.0989622880283472, "grad_norm": 0.15870924293994904, "learning_rate": 1.5272521832438486e-05, "loss": 0.5141, "step": 4342 }, { "epoch": 1.0992153885092382, "grad_norm": 0.1504451334476471, "learning_rate": 1.5270487202940116e-05, "loss": 0.5084, "step": 4343 }, { "epoch": 1.0994684889901292, "grad_norm": 0.15166409313678741, "learning_rate": 1.526845227129589e-05, "loss": 0.5115, "step": 4344 }, { "epoch": 1.09972158947102, "grad_norm": 0.17238469421863556, "learning_rate": 1.5266417037622476e-05, "loss": 0.5015, "step": 4345 }, { "epoch": 1.099974689951911, "grad_norm": 0.152670755982399, "learning_rate": 1.5264381502036543e-05, "loss": 0.5143, "step": 4346 }, { "epoch": 1.1002277904328017, "grad_norm": 0.1542736291885376, "learning_rate": 1.5262345664654784e-05, "loss": 0.5035, "step": 4347 }, { "epoch": 1.1004808909136927, "grad_norm": 0.15105880796909332, "learning_rate": 1.5260309525593913e-05, "loss": 0.4996, "step": 4348 }, { "epoch": 1.1007339913945837, "grad_norm": 0.15981851518154144, "learning_rate": 1.5258273084970656e-05, "loss": 0.4861, "step": 4349 }, { "epoch": 1.1009870918754745, "grad_norm": 0.16696465015411377, "learning_rate": 1.5256236342901756e-05, "loss": 0.5225, "step": 4350 }, { "epoch": 1.1012401923563655, "grad_norm": 0.15144138038158417, "learning_rate": 1.5254199299503972e-05, "loss": 0.516, "step": 4351 }, { "epoch": 1.1014932928372563, "grad_norm": 0.15419089794158936, "learning_rate": 1.5252161954894093e-05, "loss": 0.5262, "step": 4352 }, { "epoch": 1.1017463933181473, "grad_norm": 0.1484547257423401, "learning_rate": 1.5250124309188907e-05, "loss": 0.5294, "step": 4353 }, { "epoch": 1.1019994937990383, "grad_norm": 0.14958380162715912, "learning_rate": 1.5248086362505231e-05, "loss": 0.4983, "step": 4354 }, { "epoch": 1.102252594279929, "grad_norm": 0.1459003984928131, "learning_rate": 1.5246048114959902e-05, "loss": 0.4769, "step": 4355 }, { "epoch": 1.10250569476082, "grad_norm": 0.15102186799049377, "learning_rate": 1.5244009566669758e-05, "loss": 0.4901, "step": 4356 }, { "epoch": 1.102758795241711, "grad_norm": 0.15082873404026031, "learning_rate": 1.524197071775167e-05, "loss": 0.4968, "step": 4357 }, { "epoch": 1.1030118957226018, "grad_norm": 0.15037629008293152, "learning_rate": 1.5239931568322519e-05, "loss": 0.4893, "step": 4358 }, { "epoch": 1.1032649962034928, "grad_norm": 0.15410080552101135, "learning_rate": 1.5237892118499206e-05, "loss": 0.4954, "step": 4359 }, { "epoch": 1.1035180966843836, "grad_norm": 0.14837878942489624, "learning_rate": 1.5235852368398652e-05, "loss": 0.4998, "step": 4360 }, { "epoch": 1.1037711971652746, "grad_norm": 0.14902861416339874, "learning_rate": 1.5233812318137787e-05, "loss": 0.4868, "step": 4361 }, { "epoch": 1.1040242976461656, "grad_norm": 0.14886997640132904, "learning_rate": 1.5231771967833562e-05, "loss": 0.4806, "step": 4362 }, { "epoch": 1.1042773981270564, "grad_norm": 0.18965311348438263, "learning_rate": 1.522973131760295e-05, "loss": 0.5058, "step": 4363 }, { "epoch": 1.1045304986079474, "grad_norm": 0.15019407868385315, "learning_rate": 1.5227690367562935e-05, "loss": 0.4876, "step": 4364 }, { "epoch": 1.1047835990888384, "grad_norm": 0.15315602719783783, "learning_rate": 1.5225649117830519e-05, "loss": 0.5171, "step": 4365 }, { "epoch": 1.1050366995697292, "grad_norm": 0.17048317193984985, "learning_rate": 1.5223607568522724e-05, "loss": 0.5058, "step": 4366 }, { "epoch": 1.1052898000506202, "grad_norm": 0.15379580855369568, "learning_rate": 1.5221565719756588e-05, "loss": 0.5254, "step": 4367 }, { "epoch": 1.105542900531511, "grad_norm": 0.15727490186691284, "learning_rate": 1.5219523571649168e-05, "loss": 0.5023, "step": 4368 }, { "epoch": 1.105796001012402, "grad_norm": 0.15291915833950043, "learning_rate": 1.521748112431753e-05, "loss": 0.5051, "step": 4369 }, { "epoch": 1.106049101493293, "grad_norm": 0.1533598154783249, "learning_rate": 1.5215438377878768e-05, "loss": 0.4871, "step": 4370 }, { "epoch": 1.1063022019741837, "grad_norm": 0.1585204005241394, "learning_rate": 1.5213395332449987e-05, "loss": 0.5131, "step": 4371 }, { "epoch": 1.1065553024550747, "grad_norm": 0.15738743543624878, "learning_rate": 1.521135198814831e-05, "loss": 0.507, "step": 4372 }, { "epoch": 1.1068084029359655, "grad_norm": 0.1535847783088684, "learning_rate": 1.520930834509088e-05, "loss": 0.5036, "step": 4373 }, { "epoch": 1.1070615034168565, "grad_norm": 0.15075922012329102, "learning_rate": 1.5207264403394853e-05, "loss": 0.5074, "step": 4374 }, { "epoch": 1.1073146038977475, "grad_norm": 0.14967545866966248, "learning_rate": 1.5205220163177402e-05, "loss": 0.4965, "step": 4375 }, { "epoch": 1.1075677043786383, "grad_norm": 0.15206517279148102, "learning_rate": 1.5203175624555717e-05, "loss": 0.4899, "step": 4376 }, { "epoch": 1.1078208048595293, "grad_norm": 0.1518884301185608, "learning_rate": 1.5201130787647018e-05, "loss": 0.4966, "step": 4377 }, { "epoch": 1.10807390534042, "grad_norm": 0.164960578083992, "learning_rate": 1.519908565256852e-05, "loss": 0.4888, "step": 4378 }, { "epoch": 1.108327005821311, "grad_norm": 0.15350238978862762, "learning_rate": 1.5197040219437466e-05, "loss": 0.506, "step": 4379 }, { "epoch": 1.108580106302202, "grad_norm": 0.1600707322359085, "learning_rate": 1.5194994488371126e-05, "loss": 0.4974, "step": 4380 }, { "epoch": 1.1088332067830928, "grad_norm": 0.15787430107593536, "learning_rate": 1.519294845948677e-05, "loss": 0.5159, "step": 4381 }, { "epoch": 1.1090863072639838, "grad_norm": 0.15059372782707214, "learning_rate": 1.5190902132901696e-05, "loss": 0.5019, "step": 4382 }, { "epoch": 1.1093394077448746, "grad_norm": 0.14538879692554474, "learning_rate": 1.5188855508733214e-05, "loss": 0.518, "step": 4383 }, { "epoch": 1.1095925082257656, "grad_norm": 0.14909009635448456, "learning_rate": 1.518680858709865e-05, "loss": 0.4912, "step": 4384 }, { "epoch": 1.1098456087066566, "grad_norm": 0.15148216485977173, "learning_rate": 1.5184761368115353e-05, "loss": 0.4927, "step": 4385 }, { "epoch": 1.1100987091875474, "grad_norm": 0.16010677814483643, "learning_rate": 1.5182713851900686e-05, "loss": 0.5066, "step": 4386 }, { "epoch": 1.1103518096684384, "grad_norm": 0.15163959562778473, "learning_rate": 1.5180666038572028e-05, "loss": 0.5044, "step": 4387 }, { "epoch": 1.1106049101493294, "grad_norm": 0.20646870136260986, "learning_rate": 1.5178617928246773e-05, "loss": 0.4999, "step": 4388 }, { "epoch": 1.1108580106302202, "grad_norm": 0.15154814720153809, "learning_rate": 1.517656952104234e-05, "loss": 0.5102, "step": 4389 }, { "epoch": 1.1111111111111112, "grad_norm": 0.15697988867759705, "learning_rate": 1.5174520817076155e-05, "loss": 0.5031, "step": 4390 }, { "epoch": 1.111364211592002, "grad_norm": 0.15264984965324402, "learning_rate": 1.5172471816465666e-05, "loss": 0.5015, "step": 4391 }, { "epoch": 1.111617312072893, "grad_norm": 0.15309308469295502, "learning_rate": 1.5170422519328343e-05, "loss": 0.459, "step": 4392 }, { "epoch": 1.111870412553784, "grad_norm": 0.15518596768379211, "learning_rate": 1.5168372925781667e-05, "loss": 0.5042, "step": 4393 }, { "epoch": 1.1121235130346747, "grad_norm": 0.15685930848121643, "learning_rate": 1.5166323035943132e-05, "loss": 0.4925, "step": 4394 }, { "epoch": 1.1123766135155657, "grad_norm": 0.14795920252799988, "learning_rate": 1.5164272849930256e-05, "loss": 0.4727, "step": 4395 }, { "epoch": 1.1126297139964565, "grad_norm": 0.1536049097776413, "learning_rate": 1.516222236786057e-05, "loss": 0.5134, "step": 4396 }, { "epoch": 1.1128828144773475, "grad_norm": 0.5776845216751099, "learning_rate": 1.5160171589851632e-05, "loss": 0.4949, "step": 4397 }, { "epoch": 1.1131359149582385, "grad_norm": 0.1548316776752472, "learning_rate": 1.5158120516020995e-05, "loss": 0.524, "step": 4398 }, { "epoch": 1.1133890154391293, "grad_norm": 0.15136609971523285, "learning_rate": 1.5156069146486256e-05, "loss": 0.499, "step": 4399 }, { "epoch": 1.1136421159200203, "grad_norm": 0.15080523490905762, "learning_rate": 1.5154017481365011e-05, "loss": 0.4895, "step": 4400 }, { "epoch": 1.1138952164009113, "grad_norm": 0.15632697939872742, "learning_rate": 1.515196552077487e-05, "loss": 0.4851, "step": 4401 }, { "epoch": 1.114148316881802, "grad_norm": 0.17491662502288818, "learning_rate": 1.5149913264833482e-05, "loss": 0.5017, "step": 4402 }, { "epoch": 1.114401417362693, "grad_norm": 0.1556183248758316, "learning_rate": 1.5147860713658487e-05, "loss": 0.4965, "step": 4403 }, { "epoch": 1.1146545178435838, "grad_norm": 0.15949764847755432, "learning_rate": 1.5145807867367556e-05, "loss": 0.4928, "step": 4404 }, { "epoch": 1.1149076183244748, "grad_norm": 0.15081200003623962, "learning_rate": 1.5143754726078377e-05, "loss": 0.4819, "step": 4405 }, { "epoch": 1.1151607188053658, "grad_norm": 0.1473296731710434, "learning_rate": 1.5141701289908653e-05, "loss": 0.4931, "step": 4406 }, { "epoch": 1.1154138192862566, "grad_norm": 0.166677325963974, "learning_rate": 1.5139647558976097e-05, "loss": 0.4976, "step": 4407 }, { "epoch": 1.1156669197671476, "grad_norm": 0.15962107479572296, "learning_rate": 1.513759353339845e-05, "loss": 0.4899, "step": 4408 }, { "epoch": 1.1159200202480384, "grad_norm": 0.15268753468990326, "learning_rate": 1.5135539213293465e-05, "loss": 0.4899, "step": 4409 }, { "epoch": 1.1161731207289294, "grad_norm": 0.14671221375465393, "learning_rate": 1.513348459877891e-05, "loss": 0.4658, "step": 4410 }, { "epoch": 1.1164262212098204, "grad_norm": 0.1552177518606186, "learning_rate": 1.513142968997257e-05, "loss": 0.511, "step": 4411 }, { "epoch": 1.1166793216907112, "grad_norm": 0.1486741602420807, "learning_rate": 1.5129374486992254e-05, "loss": 0.4811, "step": 4412 }, { "epoch": 1.1169324221716022, "grad_norm": 0.15427850186824799, "learning_rate": 1.5127318989955779e-05, "loss": 0.4992, "step": 4413 }, { "epoch": 1.117185522652493, "grad_norm": 0.15394657850265503, "learning_rate": 1.5125263198980981e-05, "loss": 0.4918, "step": 4414 }, { "epoch": 1.117438623133384, "grad_norm": 0.1545231193304062, "learning_rate": 1.5123207114185717e-05, "loss": 0.4867, "step": 4415 }, { "epoch": 1.117691723614275, "grad_norm": 0.1518087387084961, "learning_rate": 1.512115073568786e-05, "loss": 0.487, "step": 4416 }, { "epoch": 1.1179448240951657, "grad_norm": 0.20733951032161713, "learning_rate": 1.5119094063605285e-05, "loss": 0.4964, "step": 4417 }, { "epoch": 1.1181979245760567, "grad_norm": 0.15090887248516083, "learning_rate": 1.5117037098055916e-05, "loss": 0.481, "step": 4418 }, { "epoch": 1.1184510250569477, "grad_norm": 0.15299421548843384, "learning_rate": 1.5114979839157661e-05, "loss": 0.5249, "step": 4419 }, { "epoch": 1.1187041255378385, "grad_norm": 0.15338605642318726, "learning_rate": 1.511292228702846e-05, "loss": 0.5255, "step": 4420 }, { "epoch": 1.1189572260187295, "grad_norm": 0.15356262028217316, "learning_rate": 1.5110864441786274e-05, "loss": 0.5147, "step": 4421 }, { "epoch": 1.1192103264996203, "grad_norm": 0.15609502792358398, "learning_rate": 1.5108806303549068e-05, "loss": 0.4794, "step": 4422 }, { "epoch": 1.1194634269805113, "grad_norm": 0.15665178000926971, "learning_rate": 1.5106747872434833e-05, "loss": 0.4896, "step": 4423 }, { "epoch": 1.1197165274614023, "grad_norm": 0.15239985287189484, "learning_rate": 1.5104689148561577e-05, "loss": 0.4802, "step": 4424 }, { "epoch": 1.119969627942293, "grad_norm": 0.30692631006240845, "learning_rate": 1.5102630132047323e-05, "loss": 0.4997, "step": 4425 }, { "epoch": 1.120222728423184, "grad_norm": 0.19465741515159607, "learning_rate": 1.5100570823010104e-05, "loss": 0.5105, "step": 4426 }, { "epoch": 1.1204758289040748, "grad_norm": 0.1551627367734909, "learning_rate": 1.5098511221567977e-05, "loss": 0.5116, "step": 4427 }, { "epoch": 1.1207289293849658, "grad_norm": 0.15337654948234558, "learning_rate": 1.5096451327839021e-05, "loss": 0.5011, "step": 4428 }, { "epoch": 1.1209820298658568, "grad_norm": 0.15800996124744415, "learning_rate": 1.5094391141941319e-05, "loss": 0.4806, "step": 4429 }, { "epoch": 1.1212351303467476, "grad_norm": 0.16187424957752228, "learning_rate": 1.5092330663992978e-05, "loss": 0.4866, "step": 4430 }, { "epoch": 1.1214882308276386, "grad_norm": 0.14710259437561035, "learning_rate": 1.5090269894112126e-05, "loss": 0.4826, "step": 4431 }, { "epoch": 1.1217413313085296, "grad_norm": 0.19280964136123657, "learning_rate": 1.5088208832416895e-05, "loss": 0.4832, "step": 4432 }, { "epoch": 1.1219944317894204, "grad_norm": 0.1506638377904892, "learning_rate": 1.5086147479025446e-05, "loss": 0.5403, "step": 4433 }, { "epoch": 1.1222475322703114, "grad_norm": 0.15227295458316803, "learning_rate": 1.5084085834055953e-05, "loss": 0.4985, "step": 4434 }, { "epoch": 1.1225006327512022, "grad_norm": 0.15187792479991913, "learning_rate": 1.5082023897626603e-05, "loss": 0.4873, "step": 4435 }, { "epoch": 1.1227537332320932, "grad_norm": 0.14819838106632233, "learning_rate": 1.5079961669855601e-05, "loss": 0.4866, "step": 4436 }, { "epoch": 1.1230068337129842, "grad_norm": 0.15491323173046112, "learning_rate": 1.5077899150861175e-05, "loss": 0.5058, "step": 4437 }, { "epoch": 1.123259934193875, "grad_norm": 0.15460236370563507, "learning_rate": 1.507583634076156e-05, "loss": 0.5048, "step": 4438 }, { "epoch": 1.123513034674766, "grad_norm": 0.15346521139144897, "learning_rate": 1.5073773239675015e-05, "loss": 0.5127, "step": 4439 }, { "epoch": 1.1237661351556567, "grad_norm": 0.16585476696491241, "learning_rate": 1.5071709847719813e-05, "loss": 0.5071, "step": 4440 }, { "epoch": 1.1240192356365477, "grad_norm": 0.1535317301750183, "learning_rate": 1.5069646165014247e-05, "loss": 0.5071, "step": 4441 }, { "epoch": 1.1242723361174387, "grad_norm": 0.20207655429840088, "learning_rate": 1.5067582191676616e-05, "loss": 0.5014, "step": 4442 }, { "epoch": 1.1245254365983295, "grad_norm": 0.14779265224933624, "learning_rate": 1.506551792782525e-05, "loss": 0.5124, "step": 4443 }, { "epoch": 1.1247785370792205, "grad_norm": 0.14889021217823029, "learning_rate": 1.5063453373578485e-05, "loss": 0.4882, "step": 4444 }, { "epoch": 1.1250316375601113, "grad_norm": 0.15384557843208313, "learning_rate": 1.5061388529054677e-05, "loss": 0.5045, "step": 4445 }, { "epoch": 1.1252847380410023, "grad_norm": 0.15044476091861725, "learning_rate": 1.5059323394372207e-05, "loss": 0.5084, "step": 4446 }, { "epoch": 1.1255378385218933, "grad_norm": 0.1536383032798767, "learning_rate": 1.5057257969649455e-05, "loss": 0.5035, "step": 4447 }, { "epoch": 1.125790939002784, "grad_norm": 0.14830613136291504, "learning_rate": 1.5055192255004831e-05, "loss": 0.4989, "step": 4448 }, { "epoch": 1.126044039483675, "grad_norm": 0.14491593837738037, "learning_rate": 1.5053126250556758e-05, "loss": 0.4792, "step": 4449 }, { "epoch": 1.1262971399645658, "grad_norm": 0.16118521988391876, "learning_rate": 1.5051059956423676e-05, "loss": 0.4872, "step": 4450 }, { "epoch": 1.1265502404454568, "grad_norm": 0.16577772796154022, "learning_rate": 1.5048993372724043e-05, "loss": 0.5078, "step": 4451 }, { "epoch": 1.1268033409263478, "grad_norm": 0.16052287817001343, "learning_rate": 1.504692649957633e-05, "loss": 0.4971, "step": 4452 }, { "epoch": 1.1270564414072386, "grad_norm": 0.15107719600200653, "learning_rate": 1.5044859337099027e-05, "loss": 0.4842, "step": 4453 }, { "epoch": 1.1273095418881296, "grad_norm": 0.15358081459999084, "learning_rate": 1.5042791885410638e-05, "loss": 0.4877, "step": 4454 }, { "epoch": 1.1275626423690206, "grad_norm": 0.15170229971408844, "learning_rate": 1.5040724144629685e-05, "loss": 0.5037, "step": 4455 }, { "epoch": 1.1278157428499114, "grad_norm": 0.1541074961423874, "learning_rate": 1.5038656114874712e-05, "loss": 0.4746, "step": 4456 }, { "epoch": 1.1280688433308024, "grad_norm": 0.14938542246818542, "learning_rate": 1.5036587796264275e-05, "loss": 0.5055, "step": 4457 }, { "epoch": 1.1283219438116932, "grad_norm": 0.1542871594429016, "learning_rate": 1.5034519188916938e-05, "loss": 0.4907, "step": 4458 }, { "epoch": 1.1285750442925842, "grad_norm": 0.15429328382015228, "learning_rate": 1.5032450292951297e-05, "loss": 0.5252, "step": 4459 }, { "epoch": 1.1288281447734752, "grad_norm": 0.15635327994823456, "learning_rate": 1.5030381108485958e-05, "loss": 0.5035, "step": 4460 }, { "epoch": 1.129081245254366, "grad_norm": 0.14885006844997406, "learning_rate": 1.502831163563954e-05, "loss": 0.487, "step": 4461 }, { "epoch": 1.129334345735257, "grad_norm": 0.14826525747776031, "learning_rate": 1.502624187453068e-05, "loss": 0.4902, "step": 4462 }, { "epoch": 1.129587446216148, "grad_norm": 0.15496915578842163, "learning_rate": 1.502417182527804e-05, "loss": 0.4927, "step": 4463 }, { "epoch": 1.1298405466970387, "grad_norm": 0.15618129074573517, "learning_rate": 1.502210148800028e-05, "loss": 0.5203, "step": 4464 }, { "epoch": 1.1300936471779297, "grad_norm": 0.15878267586231232, "learning_rate": 1.5020030862816102e-05, "loss": 0.4966, "step": 4465 }, { "epoch": 1.1303467476588205, "grad_norm": 0.15419431030750275, "learning_rate": 1.5017959949844199e-05, "loss": 0.4869, "step": 4466 }, { "epoch": 1.1305998481397115, "grad_norm": 0.15718205273151398, "learning_rate": 1.5015888749203293e-05, "loss": 0.5012, "step": 4467 }, { "epoch": 1.1308529486206025, "grad_norm": 0.15696579217910767, "learning_rate": 1.5013817261012132e-05, "loss": 0.4848, "step": 4468 }, { "epoch": 1.1311060491014933, "grad_norm": 0.1527346819639206, "learning_rate": 1.501174548538946e-05, "loss": 0.488, "step": 4469 }, { "epoch": 1.1313591495823843, "grad_norm": 0.15426510572433472, "learning_rate": 1.500967342245405e-05, "loss": 0.4942, "step": 4470 }, { "epoch": 1.131612250063275, "grad_norm": 0.1598716378211975, "learning_rate": 1.5007601072324689e-05, "loss": 0.492, "step": 4471 }, { "epoch": 1.131865350544166, "grad_norm": 0.15215171873569489, "learning_rate": 1.5005528435120182e-05, "loss": 0.5082, "step": 4472 }, { "epoch": 1.132118451025057, "grad_norm": 0.17876341938972473, "learning_rate": 1.5003455510959345e-05, "loss": 0.4951, "step": 4473 }, { "epoch": 1.1323715515059478, "grad_norm": 0.15258780121803284, "learning_rate": 1.500138229996102e-05, "loss": 0.472, "step": 4474 }, { "epoch": 1.1326246519868388, "grad_norm": 0.14967603981494904, "learning_rate": 1.499930880224406e-05, "loss": 0.4906, "step": 4475 }, { "epoch": 1.1328777524677296, "grad_norm": 0.1517462432384491, "learning_rate": 1.4997235017927325e-05, "loss": 0.5247, "step": 4476 }, { "epoch": 1.1331308529486206, "grad_norm": 0.1466211974620819, "learning_rate": 1.499516094712971e-05, "loss": 0.4889, "step": 4477 }, { "epoch": 1.1333839534295116, "grad_norm": 0.15160037577152252, "learning_rate": 1.4993086589970112e-05, "loss": 0.4965, "step": 4478 }, { "epoch": 1.1336370539104024, "grad_norm": 0.1490289866924286, "learning_rate": 1.4991011946567455e-05, "loss": 0.4967, "step": 4479 }, { "epoch": 1.1338901543912934, "grad_norm": 0.160428985953331, "learning_rate": 1.4988937017040666e-05, "loss": 0.4916, "step": 4480 }, { "epoch": 1.1341432548721841, "grad_norm": 0.16257289052009583, "learning_rate": 1.4986861801508701e-05, "loss": 0.5025, "step": 4481 }, { "epoch": 1.1343963553530751, "grad_norm": 0.15674084424972534, "learning_rate": 1.498478630009053e-05, "loss": 0.5328, "step": 4482 }, { "epoch": 1.1346494558339661, "grad_norm": 0.15497183799743652, "learning_rate": 1.4982710512905132e-05, "loss": 0.4906, "step": 4483 }, { "epoch": 1.134902556314857, "grad_norm": 0.14720937609672546, "learning_rate": 1.498063444007151e-05, "loss": 0.4722, "step": 4484 }, { "epoch": 1.135155656795748, "grad_norm": 0.1544056534767151, "learning_rate": 1.4978558081708685e-05, "loss": 0.4963, "step": 4485 }, { "epoch": 1.1354087572766387, "grad_norm": 0.15654411911964417, "learning_rate": 1.497648143793568e-05, "loss": 0.4841, "step": 4486 }, { "epoch": 1.1356618577575297, "grad_norm": 0.16091877222061157, "learning_rate": 1.4974404508871551e-05, "loss": 0.4868, "step": 4487 }, { "epoch": 1.1359149582384207, "grad_norm": 0.15532781183719635, "learning_rate": 1.4972327294635367e-05, "loss": 0.4777, "step": 4488 }, { "epoch": 1.1361680587193115, "grad_norm": 0.15229521691799164, "learning_rate": 1.4970249795346204e-05, "loss": 0.5358, "step": 4489 }, { "epoch": 1.1364211592002025, "grad_norm": 0.1529165655374527, "learning_rate": 1.4968172011123166e-05, "loss": 0.5036, "step": 4490 }, { "epoch": 1.1366742596810935, "grad_norm": 0.15232688188552856, "learning_rate": 1.4966093942085365e-05, "loss": 0.4804, "step": 4491 }, { "epoch": 1.1369273601619843, "grad_norm": 0.15192368626594543, "learning_rate": 1.496401558835193e-05, "loss": 0.4946, "step": 4492 }, { "epoch": 1.1371804606428753, "grad_norm": 0.15417982637882233, "learning_rate": 1.4961936950042013e-05, "loss": 0.5074, "step": 4493 }, { "epoch": 1.1374335611237663, "grad_norm": 0.1592484414577484, "learning_rate": 1.4959858027274773e-05, "loss": 0.5077, "step": 4494 }, { "epoch": 1.137686661604657, "grad_norm": 0.15914742648601532, "learning_rate": 1.4957778820169398e-05, "loss": 0.5148, "step": 4495 }, { "epoch": 1.137939762085548, "grad_norm": 0.16582271456718445, "learning_rate": 1.4955699328845077e-05, "loss": 0.505, "step": 4496 }, { "epoch": 1.1381928625664388, "grad_norm": 0.16284498572349548, "learning_rate": 1.4953619553421027e-05, "loss": 0.4839, "step": 4497 }, { "epoch": 1.1384459630473298, "grad_norm": 0.19038650393486023, "learning_rate": 1.4951539494016473e-05, "loss": 0.5072, "step": 4498 }, { "epoch": 1.1386990635282208, "grad_norm": 0.14853793382644653, "learning_rate": 1.4949459150750668e-05, "loss": 0.5149, "step": 4499 }, { "epoch": 1.1389521640091116, "grad_norm": 0.14945878088474274, "learning_rate": 1.4947378523742865e-05, "loss": 0.5254, "step": 4500 }, { "epoch": 1.1392052644900026, "grad_norm": 0.1539430320262909, "learning_rate": 1.494529761311235e-05, "loss": 0.512, "step": 4501 }, { "epoch": 1.1394583649708934, "grad_norm": 0.16534796357154846, "learning_rate": 1.4943216418978408e-05, "loss": 0.5027, "step": 4502 }, { "epoch": 1.1397114654517844, "grad_norm": 0.14662806689739227, "learning_rate": 1.4941134941460356e-05, "loss": 0.5052, "step": 4503 }, { "epoch": 1.1399645659326754, "grad_norm": 0.15241102874279022, "learning_rate": 1.4939053180677524e-05, "loss": 0.493, "step": 4504 }, { "epoch": 1.1402176664135661, "grad_norm": 0.15630359947681427, "learning_rate": 1.4936971136749243e-05, "loss": 0.4659, "step": 4505 }, { "epoch": 1.1404707668944571, "grad_norm": 0.151032954454422, "learning_rate": 1.493488880979488e-05, "loss": 0.4871, "step": 4506 }, { "epoch": 1.140723867375348, "grad_norm": 0.15131443738937378, "learning_rate": 1.4932806199933816e-05, "loss": 0.488, "step": 4507 }, { "epoch": 1.140976967856239, "grad_norm": 0.15451858937740326, "learning_rate": 1.4930723307285429e-05, "loss": 0.4758, "step": 4508 }, { "epoch": 1.14123006833713, "grad_norm": 0.16497088968753815, "learning_rate": 1.4928640131969138e-05, "loss": 0.474, "step": 4509 }, { "epoch": 1.1414831688180207, "grad_norm": 0.14798031747341156, "learning_rate": 1.4926556674104364e-05, "loss": 0.4784, "step": 4510 }, { "epoch": 1.1417362692989117, "grad_norm": 0.155374675989151, "learning_rate": 1.4924472933810544e-05, "loss": 0.5051, "step": 4511 }, { "epoch": 1.1419893697798025, "grad_norm": 0.1539786458015442, "learning_rate": 1.4922388911207135e-05, "loss": 0.5004, "step": 4512 }, { "epoch": 1.1422424702606935, "grad_norm": 0.1513451337814331, "learning_rate": 1.4920304606413616e-05, "loss": 0.4941, "step": 4513 }, { "epoch": 1.1424955707415845, "grad_norm": 0.14979660511016846, "learning_rate": 1.4918220019549468e-05, "loss": 0.5034, "step": 4514 }, { "epoch": 1.1427486712224753, "grad_norm": 0.1476915031671524, "learning_rate": 1.49161351507342e-05, "loss": 0.4921, "step": 4515 }, { "epoch": 1.1430017717033663, "grad_norm": 0.15157783031463623, "learning_rate": 1.4914050000087332e-05, "loss": 0.4678, "step": 4516 }, { "epoch": 1.143254872184257, "grad_norm": 0.14807292819023132, "learning_rate": 1.49119645677284e-05, "loss": 0.4634, "step": 4517 }, { "epoch": 1.143507972665148, "grad_norm": 0.16486209630966187, "learning_rate": 1.490987885377696e-05, "loss": 0.5093, "step": 4518 }, { "epoch": 1.143761073146039, "grad_norm": 0.16770464181900024, "learning_rate": 1.4907792858352582e-05, "loss": 0.4763, "step": 4519 }, { "epoch": 1.1440141736269298, "grad_norm": 0.15387621521949768, "learning_rate": 1.4905706581574846e-05, "loss": 0.474, "step": 4520 }, { "epoch": 1.1442672741078208, "grad_norm": 0.1573195457458496, "learning_rate": 1.4903620023563361e-05, "loss": 0.5039, "step": 4521 }, { "epoch": 1.1445203745887118, "grad_norm": 0.18023450672626495, "learning_rate": 1.4901533184437741e-05, "loss": 0.5062, "step": 4522 }, { "epoch": 1.1447734750696026, "grad_norm": 0.15731596946716309, "learning_rate": 1.4899446064317622e-05, "loss": 0.492, "step": 4523 }, { "epoch": 1.1450265755504936, "grad_norm": 0.15252766013145447, "learning_rate": 1.4897358663322652e-05, "loss": 0.4748, "step": 4524 }, { "epoch": 1.1452796760313844, "grad_norm": 0.15619778633117676, "learning_rate": 1.4895270981572499e-05, "loss": 0.5082, "step": 4525 }, { "epoch": 1.1455327765122754, "grad_norm": 0.15318432450294495, "learning_rate": 1.4893183019186843e-05, "loss": 0.5157, "step": 4526 }, { "epoch": 1.1457858769931664, "grad_norm": 0.15334902703762054, "learning_rate": 1.4891094776285383e-05, "loss": 0.4851, "step": 4527 }, { "epoch": 1.1460389774740571, "grad_norm": 0.1571304351091385, "learning_rate": 1.4889006252987839e-05, "loss": 0.5014, "step": 4528 }, { "epoch": 1.1462920779549481, "grad_norm": 0.1513826847076416, "learning_rate": 1.4886917449413936e-05, "loss": 0.5177, "step": 4529 }, { "epoch": 1.1465451784358391, "grad_norm": 0.15262196958065033, "learning_rate": 1.4884828365683419e-05, "loss": 0.5061, "step": 4530 }, { "epoch": 1.14679827891673, "grad_norm": 0.6714463829994202, "learning_rate": 1.4882739001916055e-05, "loss": 0.516, "step": 4531 }, { "epoch": 1.147051379397621, "grad_norm": 0.1528487205505371, "learning_rate": 1.4880649358231625e-05, "loss": 0.5118, "step": 4532 }, { "epoch": 1.1473044798785117, "grad_norm": 0.16101539134979248, "learning_rate": 1.4878559434749917e-05, "loss": 0.5162, "step": 4533 }, { "epoch": 1.1475575803594027, "grad_norm": 0.1466965675354004, "learning_rate": 1.4876469231590745e-05, "loss": 0.4772, "step": 4534 }, { "epoch": 1.1478106808402937, "grad_norm": 0.15363794565200806, "learning_rate": 1.4874378748873939e-05, "loss": 0.4901, "step": 4535 }, { "epoch": 1.1480637813211845, "grad_norm": 0.14833836257457733, "learning_rate": 1.4872287986719335e-05, "loss": 0.4782, "step": 4536 }, { "epoch": 1.1483168818020755, "grad_norm": 0.15358251333236694, "learning_rate": 1.4870196945246798e-05, "loss": 0.5218, "step": 4537 }, { "epoch": 1.1485699822829663, "grad_norm": 0.1516243815422058, "learning_rate": 1.4868105624576203e-05, "loss": 0.5014, "step": 4538 }, { "epoch": 1.1488230827638573, "grad_norm": 0.15382201969623566, "learning_rate": 1.4866014024827435e-05, "loss": 0.4732, "step": 4539 }, { "epoch": 1.1490761832447483, "grad_norm": 0.15255552530288696, "learning_rate": 1.4863922146120409e-05, "loss": 0.4865, "step": 4540 }, { "epoch": 1.149329283725639, "grad_norm": 0.17686694860458374, "learning_rate": 1.4861829988575043e-05, "loss": 0.4954, "step": 4541 }, { "epoch": 1.14958238420653, "grad_norm": 0.16097335517406464, "learning_rate": 1.4859737552311275e-05, "loss": 0.4896, "step": 4542 }, { "epoch": 1.1498354846874208, "grad_norm": 0.1498359888792038, "learning_rate": 1.4857644837449061e-05, "loss": 0.4856, "step": 4543 }, { "epoch": 1.1500885851683118, "grad_norm": 0.15344619750976562, "learning_rate": 1.4855551844108376e-05, "loss": 0.4786, "step": 4544 }, { "epoch": 1.1503416856492028, "grad_norm": 0.1493581086397171, "learning_rate": 1.48534585724092e-05, "loss": 0.5018, "step": 4545 }, { "epoch": 1.1505947861300936, "grad_norm": 0.15186257660388947, "learning_rate": 1.4851365022471543e-05, "loss": 0.5143, "step": 4546 }, { "epoch": 1.1508478866109846, "grad_norm": 0.16766849160194397, "learning_rate": 1.4849271194415418e-05, "loss": 0.4743, "step": 4547 }, { "epoch": 1.1511009870918754, "grad_norm": 0.15675348043441772, "learning_rate": 1.4847177088360862e-05, "loss": 0.4928, "step": 4548 }, { "epoch": 1.1513540875727664, "grad_norm": 0.14489997923374176, "learning_rate": 1.4845082704427927e-05, "loss": 0.4928, "step": 4549 }, { "epoch": 1.1516071880536574, "grad_norm": 0.15403926372528076, "learning_rate": 1.4842988042736677e-05, "loss": 0.5183, "step": 4550 }, { "epoch": 1.1518602885345481, "grad_norm": 0.15352696180343628, "learning_rate": 1.4840893103407196e-05, "loss": 0.5223, "step": 4551 }, { "epoch": 1.1521133890154391, "grad_norm": 0.15442980825901031, "learning_rate": 1.483879788655958e-05, "loss": 0.507, "step": 4552 }, { "epoch": 1.1523664894963301, "grad_norm": 0.15698111057281494, "learning_rate": 1.4836702392313948e-05, "loss": 0.5119, "step": 4553 }, { "epoch": 1.152619589977221, "grad_norm": 0.15530253946781158, "learning_rate": 1.4834606620790428e-05, "loss": 0.4891, "step": 4554 }, { "epoch": 1.152872690458112, "grad_norm": 0.15658800303936005, "learning_rate": 1.4832510572109166e-05, "loss": 0.495, "step": 4555 }, { "epoch": 1.1531257909390027, "grad_norm": 0.15327274799346924, "learning_rate": 1.4830414246390322e-05, "loss": 0.5039, "step": 4556 }, { "epoch": 1.1533788914198937, "grad_norm": 0.15858817100524902, "learning_rate": 1.4828317643754076e-05, "loss": 0.4973, "step": 4557 }, { "epoch": 1.1536319919007847, "grad_norm": 0.15560759603977203, "learning_rate": 1.4826220764320622e-05, "loss": 0.5113, "step": 4558 }, { "epoch": 1.1538850923816755, "grad_norm": 0.15306459367275238, "learning_rate": 1.4824123608210169e-05, "loss": 0.4963, "step": 4559 }, { "epoch": 1.1541381928625665, "grad_norm": 1.0836349725723267, "learning_rate": 1.4822026175542943e-05, "loss": 0.4887, "step": 4560 }, { "epoch": 1.1543912933434575, "grad_norm": 0.15071672201156616, "learning_rate": 1.4819928466439184e-05, "loss": 0.5087, "step": 4561 }, { "epoch": 1.1546443938243482, "grad_norm": 0.15151143074035645, "learning_rate": 1.4817830481019151e-05, "loss": 0.514, "step": 4562 }, { "epoch": 1.1548974943052392, "grad_norm": 0.15127737820148468, "learning_rate": 1.4815732219403116e-05, "loss": 0.4865, "step": 4563 }, { "epoch": 1.15515059478613, "grad_norm": 0.15976069867610931, "learning_rate": 1.4813633681711371e-05, "loss": 0.4988, "step": 4564 }, { "epoch": 1.155403695267021, "grad_norm": 0.16375644505023956, "learning_rate": 1.4811534868064215e-05, "loss": 0.5196, "step": 4565 }, { "epoch": 1.155656795747912, "grad_norm": 0.15645895898342133, "learning_rate": 1.480943577858197e-05, "loss": 0.5148, "step": 4566 }, { "epoch": 1.1559098962288028, "grad_norm": 0.1529003381729126, "learning_rate": 1.4807336413384977e-05, "loss": 0.5139, "step": 4567 }, { "epoch": 1.1561629967096938, "grad_norm": 0.15350906550884247, "learning_rate": 1.4805236772593583e-05, "loss": 0.511, "step": 4568 }, { "epoch": 1.1564160971905846, "grad_norm": 0.15559755265712738, "learning_rate": 1.4803136856328157e-05, "loss": 0.4993, "step": 4569 }, { "epoch": 1.1566691976714756, "grad_norm": 0.15251141786575317, "learning_rate": 1.4801036664709088e-05, "loss": 0.4748, "step": 4570 }, { "epoch": 1.1569222981523666, "grad_norm": 0.15518489480018616, "learning_rate": 1.4798936197856768e-05, "loss": 0.4948, "step": 4571 }, { "epoch": 1.1571753986332574, "grad_norm": 0.1537337303161621, "learning_rate": 1.4796835455891616e-05, "loss": 0.4966, "step": 4572 }, { "epoch": 1.1574284991141484, "grad_norm": 0.15317250788211823, "learning_rate": 1.4794734438934061e-05, "loss": 0.4801, "step": 4573 }, { "epoch": 1.1576815995950391, "grad_norm": 0.15824295580387115, "learning_rate": 1.4792633147104554e-05, "loss": 0.4829, "step": 4574 }, { "epoch": 1.1579347000759301, "grad_norm": 0.1529872715473175, "learning_rate": 1.4790531580523553e-05, "loss": 0.4915, "step": 4575 }, { "epoch": 1.1581878005568211, "grad_norm": 0.1559661626815796, "learning_rate": 1.478842973931154e-05, "loss": 0.5002, "step": 4576 }, { "epoch": 1.158440901037712, "grad_norm": 0.15457703173160553, "learning_rate": 1.4786327623589008e-05, "loss": 0.4827, "step": 4577 }, { "epoch": 1.158694001518603, "grad_norm": 0.15452201664447784, "learning_rate": 1.4784225233476463e-05, "loss": 0.486, "step": 4578 }, { "epoch": 1.1589471019994937, "grad_norm": 0.15270322561264038, "learning_rate": 1.4782122569094438e-05, "loss": 0.4797, "step": 4579 }, { "epoch": 1.1592002024803847, "grad_norm": 0.15560272336006165, "learning_rate": 1.4780019630563467e-05, "loss": 0.508, "step": 4580 }, { "epoch": 1.1594533029612757, "grad_norm": 0.14981728792190552, "learning_rate": 1.477791641800411e-05, "loss": 0.465, "step": 4581 }, { "epoch": 1.1597064034421665, "grad_norm": 0.15546533465385437, "learning_rate": 1.4775812931536943e-05, "loss": 0.4903, "step": 4582 }, { "epoch": 1.1599595039230575, "grad_norm": 0.1531168669462204, "learning_rate": 1.4773709171282549e-05, "loss": 0.4801, "step": 4583 }, { "epoch": 1.1602126044039482, "grad_norm": 0.15441486239433289, "learning_rate": 1.4771605137361534e-05, "loss": 0.4809, "step": 4584 }, { "epoch": 1.1604657048848392, "grad_norm": 0.15585966408252716, "learning_rate": 1.4769500829894521e-05, "loss": 0.4929, "step": 4585 }, { "epoch": 1.1607188053657302, "grad_norm": 0.156850203871727, "learning_rate": 1.476739624900214e-05, "loss": 0.4717, "step": 4586 }, { "epoch": 1.160971905846621, "grad_norm": 0.15403494238853455, "learning_rate": 1.4765291394805046e-05, "loss": 0.4951, "step": 4587 }, { "epoch": 1.161225006327512, "grad_norm": 0.15185825526714325, "learning_rate": 1.4763186267423903e-05, "loss": 0.5, "step": 4588 }, { "epoch": 1.161478106808403, "grad_norm": 0.16162091493606567, "learning_rate": 1.4761080866979396e-05, "loss": 0.5429, "step": 4589 }, { "epoch": 1.1617312072892938, "grad_norm": 0.15204273164272308, "learning_rate": 1.4758975193592225e-05, "loss": 0.4966, "step": 4590 }, { "epoch": 1.1619843077701848, "grad_norm": 0.15539345145225525, "learning_rate": 1.4756869247383097e-05, "loss": 0.5023, "step": 4591 }, { "epoch": 1.1622374082510758, "grad_norm": 0.1623717099428177, "learning_rate": 1.4754763028472747e-05, "loss": 0.5035, "step": 4592 }, { "epoch": 1.1624905087319666, "grad_norm": 0.15364901721477509, "learning_rate": 1.475265653698192e-05, "loss": 0.4876, "step": 4593 }, { "epoch": 1.1627436092128576, "grad_norm": 0.158247172832489, "learning_rate": 1.4750549773031373e-05, "loss": 0.5124, "step": 4594 }, { "epoch": 1.1629967096937484, "grad_norm": 0.15751482546329498, "learning_rate": 1.4748442736741886e-05, "loss": 0.5058, "step": 4595 }, { "epoch": 1.1632498101746394, "grad_norm": 0.15057843923568726, "learning_rate": 1.4746335428234251e-05, "loss": 0.4956, "step": 4596 }, { "epoch": 1.1635029106555304, "grad_norm": 0.15370962023735046, "learning_rate": 1.4744227847629271e-05, "loss": 0.4831, "step": 4597 }, { "epoch": 1.1637560111364211, "grad_norm": 0.1534753441810608, "learning_rate": 1.4742119995047771e-05, "loss": 0.481, "step": 4598 }, { "epoch": 1.1640091116173121, "grad_norm": 0.16202667355537415, "learning_rate": 1.4740011870610598e-05, "loss": 0.5212, "step": 4599 }, { "epoch": 1.164262212098203, "grad_norm": 0.1575966477394104, "learning_rate": 1.4737903474438593e-05, "loss": 0.4852, "step": 4600 }, { "epoch": 1.164515312579094, "grad_norm": 0.16322316229343414, "learning_rate": 1.4735794806652634e-05, "loss": 0.5177, "step": 4601 }, { "epoch": 1.164768413059985, "grad_norm": 0.1518232673406601, "learning_rate": 1.4733685867373603e-05, "loss": 0.4991, "step": 4602 }, { "epoch": 1.1650215135408757, "grad_norm": 0.15305112302303314, "learning_rate": 1.4731576656722404e-05, "loss": 0.5187, "step": 4603 }, { "epoch": 1.1652746140217667, "grad_norm": 0.157497838139534, "learning_rate": 1.4729467174819953e-05, "loss": 0.4904, "step": 4604 }, { "epoch": 1.1655277145026575, "grad_norm": 0.16192995011806488, "learning_rate": 1.4727357421787178e-05, "loss": 0.5038, "step": 4605 }, { "epoch": 1.1657808149835485, "grad_norm": 0.15403158962726593, "learning_rate": 1.472524739774503e-05, "loss": 0.4893, "step": 4606 }, { "epoch": 1.1660339154644395, "grad_norm": 0.1589723527431488, "learning_rate": 1.4723137102814478e-05, "loss": 0.486, "step": 4607 }, { "epoch": 1.1662870159453302, "grad_norm": 0.15729570388793945, "learning_rate": 1.472102653711649e-05, "loss": 0.4998, "step": 4608 }, { "epoch": 1.1665401164262212, "grad_norm": 0.14842107892036438, "learning_rate": 1.4718915700772065e-05, "loss": 0.482, "step": 4609 }, { "epoch": 1.166793216907112, "grad_norm": 0.15726573765277863, "learning_rate": 1.4716804593902215e-05, "loss": 0.5216, "step": 4610 }, { "epoch": 1.167046317388003, "grad_norm": 0.16227351129055023, "learning_rate": 1.471469321662796e-05, "loss": 0.4981, "step": 4611 }, { "epoch": 1.167299417868894, "grad_norm": 0.15464884042739868, "learning_rate": 1.4712581569070348e-05, "loss": 0.4832, "step": 4612 }, { "epoch": 1.1675525183497848, "grad_norm": 0.1531009078025818, "learning_rate": 1.4710469651350428e-05, "loss": 0.506, "step": 4613 }, { "epoch": 1.1678056188306758, "grad_norm": 0.1521708369255066, "learning_rate": 1.4708357463589276e-05, "loss": 0.4827, "step": 4614 }, { "epoch": 1.1680587193115666, "grad_norm": 0.15431474149227142, "learning_rate": 1.4706245005907978e-05, "loss": 0.4898, "step": 4615 }, { "epoch": 1.1683118197924576, "grad_norm": 0.152098149061203, "learning_rate": 1.4704132278427636e-05, "loss": 0.4859, "step": 4616 }, { "epoch": 1.1685649202733486, "grad_norm": 0.15201321244239807, "learning_rate": 1.470201928126937e-05, "loss": 0.4971, "step": 4617 }, { "epoch": 1.1688180207542394, "grad_norm": 0.1494446098804474, "learning_rate": 1.4699906014554314e-05, "loss": 0.5024, "step": 4618 }, { "epoch": 1.1690711212351304, "grad_norm": 0.20589153468608856, "learning_rate": 1.4697792478403612e-05, "loss": 0.4663, "step": 4619 }, { "epoch": 1.1693242217160214, "grad_norm": 0.1513860523700714, "learning_rate": 1.4695678672938436e-05, "loss": 0.4981, "step": 4620 }, { "epoch": 1.1695773221969121, "grad_norm": 0.19068969786167145, "learning_rate": 1.4693564598279961e-05, "loss": 0.4948, "step": 4621 }, { "epoch": 1.1698304226778031, "grad_norm": 0.1580287367105484, "learning_rate": 1.4691450254549384e-05, "loss": 0.5017, "step": 4622 }, { "epoch": 1.170083523158694, "grad_norm": 0.15604044497013092, "learning_rate": 1.4689335641867918e-05, "loss": 0.4976, "step": 4623 }, { "epoch": 1.170336623639585, "grad_norm": 0.15767541527748108, "learning_rate": 1.4687220760356784e-05, "loss": 0.5202, "step": 4624 }, { "epoch": 1.170589724120476, "grad_norm": 0.1550685167312622, "learning_rate": 1.4685105610137225e-05, "loss": 0.5085, "step": 4625 }, { "epoch": 1.1708428246013667, "grad_norm": 0.15496826171875, "learning_rate": 1.4682990191330503e-05, "loss": 0.4916, "step": 4626 }, { "epoch": 1.1710959250822577, "grad_norm": 0.1587195247411728, "learning_rate": 1.4680874504057885e-05, "loss": 0.5278, "step": 4627 }, { "epoch": 1.1713490255631487, "grad_norm": 0.1608172208070755, "learning_rate": 1.4678758548440661e-05, "loss": 0.4943, "step": 4628 }, { "epoch": 1.1716021260440395, "grad_norm": 0.15505562722682953, "learning_rate": 1.4676642324600136e-05, "loss": 0.4917, "step": 4629 }, { "epoch": 1.1718552265249305, "grad_norm": 0.17707034945487976, "learning_rate": 1.4674525832657627e-05, "loss": 0.4992, "step": 4630 }, { "epoch": 1.1721083270058212, "grad_norm": 0.15662218630313873, "learning_rate": 1.4672409072734467e-05, "loss": 0.5099, "step": 4631 }, { "epoch": 1.1723614274867122, "grad_norm": 0.1529902219772339, "learning_rate": 1.4670292044952004e-05, "loss": 0.4962, "step": 4632 }, { "epoch": 1.1726145279676032, "grad_norm": 0.15202829241752625, "learning_rate": 1.4668174749431608e-05, "loss": 0.4816, "step": 4633 }, { "epoch": 1.172867628448494, "grad_norm": 0.15931326150894165, "learning_rate": 1.4666057186294654e-05, "loss": 0.4933, "step": 4634 }, { "epoch": 1.173120728929385, "grad_norm": 0.15990197658538818, "learning_rate": 1.466393935566254e-05, "loss": 0.5263, "step": 4635 }, { "epoch": 1.1733738294102758, "grad_norm": 0.15802448987960815, "learning_rate": 1.4661821257656674e-05, "loss": 0.5032, "step": 4636 }, { "epoch": 1.1736269298911668, "grad_norm": 0.15246592462062836, "learning_rate": 1.4659702892398487e-05, "loss": 0.5109, "step": 4637 }, { "epoch": 1.1738800303720578, "grad_norm": 0.15437988936901093, "learning_rate": 1.4657584260009416e-05, "loss": 0.4784, "step": 4638 }, { "epoch": 1.1741331308529486, "grad_norm": 0.1609841138124466, "learning_rate": 1.4655465360610921e-05, "loss": 0.4795, "step": 4639 }, { "epoch": 1.1743862313338396, "grad_norm": 0.1602625846862793, "learning_rate": 1.4653346194324474e-05, "loss": 0.5123, "step": 4640 }, { "epoch": 1.1746393318147303, "grad_norm": 0.1529168039560318, "learning_rate": 1.4651226761271555e-05, "loss": 0.5066, "step": 4641 }, { "epoch": 1.1748924322956213, "grad_norm": 0.1534958779811859, "learning_rate": 1.4649107061573675e-05, "loss": 0.5043, "step": 4642 }, { "epoch": 1.1751455327765123, "grad_norm": 0.14956241846084595, "learning_rate": 1.464698709535235e-05, "loss": 0.4954, "step": 4643 }, { "epoch": 1.1753986332574031, "grad_norm": 0.1553228348493576, "learning_rate": 1.464486686272911e-05, "loss": 0.5025, "step": 4644 }, { "epoch": 1.1756517337382941, "grad_norm": 0.15142814815044403, "learning_rate": 1.4642746363825507e-05, "loss": 0.5227, "step": 4645 }, { "epoch": 1.175904834219185, "grad_norm": 0.15827877819538116, "learning_rate": 1.4640625598763105e-05, "loss": 0.4856, "step": 4646 }, { "epoch": 1.176157934700076, "grad_norm": 0.16641870141029358, "learning_rate": 1.4638504567663477e-05, "loss": 0.4701, "step": 4647 }, { "epoch": 1.176411035180967, "grad_norm": 0.150115966796875, "learning_rate": 1.4636383270648224e-05, "loss": 0.4829, "step": 4648 }, { "epoch": 1.1766641356618577, "grad_norm": 0.15478096902370453, "learning_rate": 1.4634261707838956e-05, "loss": 0.5106, "step": 4649 }, { "epoch": 1.1769172361427487, "grad_norm": 0.15714257955551147, "learning_rate": 1.4632139879357288e-05, "loss": 0.4976, "step": 4650 }, { "epoch": 1.1771703366236397, "grad_norm": 0.1560010313987732, "learning_rate": 1.4630017785324873e-05, "loss": 0.4852, "step": 4651 }, { "epoch": 1.1774234371045305, "grad_norm": 0.1579611599445343, "learning_rate": 1.4627895425863357e-05, "loss": 0.5034, "step": 4652 }, { "epoch": 1.1776765375854215, "grad_norm": 0.16136674582958221, "learning_rate": 1.4625772801094413e-05, "loss": 0.5191, "step": 4653 }, { "epoch": 1.1779296380663122, "grad_norm": 0.15910863876342773, "learning_rate": 1.4623649911139729e-05, "loss": 0.502, "step": 4654 }, { "epoch": 1.1781827385472032, "grad_norm": 0.15608109533786774, "learning_rate": 1.4621526756120998e-05, "loss": 0.5265, "step": 4655 }, { "epoch": 1.1784358390280942, "grad_norm": 0.1549849808216095, "learning_rate": 1.4619403336159946e-05, "loss": 0.4834, "step": 4656 }, { "epoch": 1.178688939508985, "grad_norm": 0.15216386318206787, "learning_rate": 1.4617279651378298e-05, "loss": 0.4967, "step": 4657 }, { "epoch": 1.178942039989876, "grad_norm": 0.15116731822490692, "learning_rate": 1.4615155701897803e-05, "loss": 0.4936, "step": 4658 }, { "epoch": 1.179195140470767, "grad_norm": 0.15174581110477448, "learning_rate": 1.4613031487840222e-05, "loss": 0.4887, "step": 4659 }, { "epoch": 1.1794482409516578, "grad_norm": 0.15367329120635986, "learning_rate": 1.4610907009327329e-05, "loss": 0.5121, "step": 4660 }, { "epoch": 1.1797013414325488, "grad_norm": 0.15839235484600067, "learning_rate": 1.460878226648092e-05, "loss": 0.4789, "step": 4661 }, { "epoch": 1.1799544419134396, "grad_norm": 0.15158912539482117, "learning_rate": 1.4606657259422798e-05, "loss": 0.5042, "step": 4662 }, { "epoch": 1.1802075423943306, "grad_norm": 0.1605156809091568, "learning_rate": 1.4604531988274789e-05, "loss": 0.4878, "step": 4663 }, { "epoch": 1.1804606428752216, "grad_norm": 0.1494605988264084, "learning_rate": 1.4602406453158726e-05, "loss": 0.4843, "step": 4664 }, { "epoch": 1.1807137433561123, "grad_norm": 0.14931213855743408, "learning_rate": 1.4600280654196468e-05, "loss": 0.509, "step": 4665 }, { "epoch": 1.1809668438370033, "grad_norm": 0.15375261008739471, "learning_rate": 1.4598154591509874e-05, "loss": 0.4927, "step": 4666 }, { "epoch": 1.1812199443178941, "grad_norm": 0.15801472961902618, "learning_rate": 1.4596028265220833e-05, "loss": 0.47, "step": 4667 }, { "epoch": 1.1814730447987851, "grad_norm": 0.15550629794597626, "learning_rate": 1.4593901675451244e-05, "loss": 0.4987, "step": 4668 }, { "epoch": 1.1817261452796761, "grad_norm": 0.1520007997751236, "learning_rate": 1.4591774822323011e-05, "loss": 0.5011, "step": 4669 }, { "epoch": 1.181979245760567, "grad_norm": 0.1579056680202484, "learning_rate": 1.458964770595807e-05, "loss": 0.5339, "step": 4670 }, { "epoch": 1.182232346241458, "grad_norm": 0.15460114181041718, "learning_rate": 1.4587520326478363e-05, "loss": 0.4963, "step": 4671 }, { "epoch": 1.1824854467223487, "grad_norm": 0.1590864509344101, "learning_rate": 1.4585392684005847e-05, "loss": 0.4982, "step": 4672 }, { "epoch": 1.1827385472032397, "grad_norm": 0.15025806427001953, "learning_rate": 1.4583264778662494e-05, "loss": 0.5246, "step": 4673 }, { "epoch": 1.1829916476841307, "grad_norm": 0.16256114840507507, "learning_rate": 1.4581136610570292e-05, "loss": 0.5047, "step": 4674 }, { "epoch": 1.1832447481650215, "grad_norm": 0.16304293274879456, "learning_rate": 1.4579008179851248e-05, "loss": 0.4969, "step": 4675 }, { "epoch": 1.1834978486459125, "grad_norm": 0.15595556795597076, "learning_rate": 1.4576879486627377e-05, "loss": 0.5206, "step": 4676 }, { "epoch": 1.1837509491268032, "grad_norm": 0.15133045613765717, "learning_rate": 1.4574750531020716e-05, "loss": 0.5005, "step": 4677 }, { "epoch": 1.1840040496076942, "grad_norm": 0.14956128597259521, "learning_rate": 1.4572621313153308e-05, "loss": 0.5143, "step": 4678 }, { "epoch": 1.1842571500885852, "grad_norm": 0.15302643179893494, "learning_rate": 1.4570491833147222e-05, "loss": 0.5215, "step": 4679 }, { "epoch": 1.184510250569476, "grad_norm": 0.15235577523708344, "learning_rate": 1.4568362091124533e-05, "loss": 0.5036, "step": 4680 }, { "epoch": 1.184763351050367, "grad_norm": 0.14849554002285004, "learning_rate": 1.4566232087207336e-05, "loss": 0.4713, "step": 4681 }, { "epoch": 1.1850164515312578, "grad_norm": 0.15594588220119476, "learning_rate": 1.4564101821517743e-05, "loss": 0.4817, "step": 4682 }, { "epoch": 1.1852695520121488, "grad_norm": 0.1528516709804535, "learning_rate": 1.4561971294177871e-05, "loss": 0.5126, "step": 4683 }, { "epoch": 1.1855226524930398, "grad_norm": 0.15339314937591553, "learning_rate": 1.4559840505309863e-05, "loss": 0.5004, "step": 4684 }, { "epoch": 1.1857757529739306, "grad_norm": 0.1553167998790741, "learning_rate": 1.4557709455035868e-05, "loss": 0.5116, "step": 4685 }, { "epoch": 1.1860288534548216, "grad_norm": 0.1590556800365448, "learning_rate": 1.455557814347806e-05, "loss": 0.5135, "step": 4686 }, { "epoch": 1.1862819539357126, "grad_norm": 0.1556331068277359, "learning_rate": 1.4553446570758624e-05, "loss": 0.5025, "step": 4687 }, { "epoch": 1.1865350544166033, "grad_norm": 0.14832580089569092, "learning_rate": 1.4551314736999748e-05, "loss": 0.5037, "step": 4688 }, { "epoch": 1.1867881548974943, "grad_norm": 0.1580008715391159, "learning_rate": 1.4549182642323658e-05, "loss": 0.4987, "step": 4689 }, { "epoch": 1.1870412553783853, "grad_norm": 0.15422558784484863, "learning_rate": 1.4547050286852578e-05, "loss": 0.4952, "step": 4690 }, { "epoch": 1.1872943558592761, "grad_norm": 0.15255515277385712, "learning_rate": 1.4544917670708745e-05, "loss": 0.4823, "step": 4691 }, { "epoch": 1.1875474563401671, "grad_norm": 0.1530275195837021, "learning_rate": 1.4542784794014426e-05, "loss": 0.495, "step": 4692 }, { "epoch": 1.187800556821058, "grad_norm": 0.15708014369010925, "learning_rate": 1.4540651656891893e-05, "loss": 0.4875, "step": 4693 }, { "epoch": 1.188053657301949, "grad_norm": 0.15858565270900726, "learning_rate": 1.453851825946343e-05, "loss": 0.5042, "step": 4694 }, { "epoch": 1.18830675778284, "grad_norm": 0.1766078770160675, "learning_rate": 1.453638460185134e-05, "loss": 0.5106, "step": 4695 }, { "epoch": 1.1885598582637307, "grad_norm": 0.16083456575870514, "learning_rate": 1.4534250684177948e-05, "loss": 0.4837, "step": 4696 }, { "epoch": 1.1888129587446217, "grad_norm": 0.1615104079246521, "learning_rate": 1.453211650656558e-05, "loss": 0.4975, "step": 4697 }, { "epoch": 1.1890660592255125, "grad_norm": 0.1543828248977661, "learning_rate": 1.452998206913659e-05, "loss": 0.4979, "step": 4698 }, { "epoch": 1.1893191597064035, "grad_norm": 0.15214140713214874, "learning_rate": 1.4527847372013334e-05, "loss": 0.4765, "step": 4699 }, { "epoch": 1.1895722601872945, "grad_norm": 0.15418758988380432, "learning_rate": 1.4525712415318194e-05, "loss": 0.4868, "step": 4700 }, { "epoch": 1.1898253606681852, "grad_norm": 0.15414832532405853, "learning_rate": 1.452357719917356e-05, "loss": 0.4999, "step": 4701 }, { "epoch": 1.1900784611490762, "grad_norm": 0.1528444141149521, "learning_rate": 1.4521441723701842e-05, "loss": 0.5153, "step": 4702 }, { "epoch": 1.190331561629967, "grad_norm": 0.14918316900730133, "learning_rate": 1.4519305989025458e-05, "loss": 0.5083, "step": 4703 }, { "epoch": 1.190584662110858, "grad_norm": 0.15229432284832, "learning_rate": 1.4517169995266851e-05, "loss": 0.4605, "step": 4704 }, { "epoch": 1.190837762591749, "grad_norm": 0.157566636800766, "learning_rate": 1.451503374254847e-05, "loss": 0.4886, "step": 4705 }, { "epoch": 1.1910908630726398, "grad_norm": 0.1592397838830948, "learning_rate": 1.4512897230992781e-05, "loss": 0.4801, "step": 4706 }, { "epoch": 1.1913439635535308, "grad_norm": 0.1572003960609436, "learning_rate": 1.4510760460722268e-05, "loss": 0.4754, "step": 4707 }, { "epoch": 1.1915970640344216, "grad_norm": 0.15613478422164917, "learning_rate": 1.4508623431859428e-05, "loss": 0.4982, "step": 4708 }, { "epoch": 1.1918501645153126, "grad_norm": 0.15019828081130981, "learning_rate": 1.450648614452677e-05, "loss": 0.5058, "step": 4709 }, { "epoch": 1.1921032649962036, "grad_norm": 0.15565308928489685, "learning_rate": 1.4504348598846816e-05, "loss": 0.4813, "step": 4710 }, { "epoch": 1.1923563654770943, "grad_norm": 0.15263988077640533, "learning_rate": 1.4502210794942114e-05, "loss": 0.4587, "step": 4711 }, { "epoch": 1.1926094659579853, "grad_norm": 0.1569148153066635, "learning_rate": 1.450007273293522e-05, "loss": 0.4918, "step": 4712 }, { "epoch": 1.1928625664388761, "grad_norm": 0.1531478762626648, "learning_rate": 1.4497934412948698e-05, "loss": 0.4865, "step": 4713 }, { "epoch": 1.1931156669197671, "grad_norm": 0.1563832312822342, "learning_rate": 1.449579583510514e-05, "loss": 0.4905, "step": 4714 }, { "epoch": 1.1933687674006581, "grad_norm": 0.15684813261032104, "learning_rate": 1.4493656999527144e-05, "loss": 0.49, "step": 4715 }, { "epoch": 1.193621867881549, "grad_norm": 0.1561594158411026, "learning_rate": 1.4491517906337325e-05, "loss": 0.4861, "step": 4716 }, { "epoch": 1.19387496836244, "grad_norm": 0.15557099878787994, "learning_rate": 1.4489378555658306e-05, "loss": 0.5044, "step": 4717 }, { "epoch": 1.194128068843331, "grad_norm": 0.15847542881965637, "learning_rate": 1.4487238947612743e-05, "loss": 0.5034, "step": 4718 }, { "epoch": 1.1943811693242217, "grad_norm": 0.1629718393087387, "learning_rate": 1.448509908232329e-05, "loss": 0.4867, "step": 4719 }, { "epoch": 1.1946342698051127, "grad_norm": 0.15843944251537323, "learning_rate": 1.4482958959912619e-05, "loss": 0.4911, "step": 4720 }, { "epoch": 1.1948873702860034, "grad_norm": 0.16090792417526245, "learning_rate": 1.448081858050342e-05, "loss": 0.5183, "step": 4721 }, { "epoch": 1.1951404707668944, "grad_norm": 0.15191036462783813, "learning_rate": 1.4478677944218395e-05, "loss": 0.4988, "step": 4722 }, { "epoch": 1.1953935712477854, "grad_norm": 0.15470080077648163, "learning_rate": 1.4476537051180266e-05, "loss": 0.5031, "step": 4723 }, { "epoch": 1.1956466717286762, "grad_norm": 0.15816397964954376, "learning_rate": 1.4474395901511765e-05, "loss": 0.5151, "step": 4724 }, { "epoch": 1.1958997722095672, "grad_norm": 0.1561191976070404, "learning_rate": 1.4472254495335636e-05, "loss": 0.5007, "step": 4725 }, { "epoch": 1.1961528726904582, "grad_norm": 0.1548040509223938, "learning_rate": 1.4470112832774644e-05, "loss": 0.5091, "step": 4726 }, { "epoch": 1.196405973171349, "grad_norm": 0.333318293094635, "learning_rate": 1.4467970913951566e-05, "loss": 0.5091, "step": 4727 }, { "epoch": 1.19665907365224, "grad_norm": 0.15379349887371063, "learning_rate": 1.4465828738989192e-05, "loss": 0.5026, "step": 4728 }, { "epoch": 1.1969121741331308, "grad_norm": 0.15740284323692322, "learning_rate": 1.446368630801033e-05, "loss": 0.4874, "step": 4729 }, { "epoch": 1.1971652746140218, "grad_norm": 0.15127934515476227, "learning_rate": 1.4461543621137801e-05, "loss": 0.4825, "step": 4730 }, { "epoch": 1.1974183750949128, "grad_norm": 0.15321731567382812, "learning_rate": 1.445940067849444e-05, "loss": 0.4885, "step": 4731 }, { "epoch": 1.1976714755758036, "grad_norm": 0.1696755588054657, "learning_rate": 1.4457257480203097e-05, "loss": 0.5135, "step": 4732 }, { "epoch": 1.1979245760566946, "grad_norm": 0.1582747995853424, "learning_rate": 1.4455114026386638e-05, "loss": 0.5054, "step": 4733 }, { "epoch": 1.1981776765375853, "grad_norm": 0.1582452952861786, "learning_rate": 1.4452970317167945e-05, "loss": 0.495, "step": 4734 }, { "epoch": 1.1984307770184763, "grad_norm": 0.14962342381477356, "learning_rate": 1.4450826352669904e-05, "loss": 0.4812, "step": 4735 }, { "epoch": 1.1986838774993673, "grad_norm": 0.15218226611614227, "learning_rate": 1.4448682133015432e-05, "loss": 0.4892, "step": 4736 }, { "epoch": 1.1989369779802581, "grad_norm": 0.16618536412715912, "learning_rate": 1.4446537658327455e-05, "loss": 0.5161, "step": 4737 }, { "epoch": 1.1991900784611491, "grad_norm": 0.16231316328048706, "learning_rate": 1.44443929287289e-05, "loss": 0.5228, "step": 4738 }, { "epoch": 1.19944317894204, "grad_norm": 0.15867680311203003, "learning_rate": 1.4442247944342727e-05, "loss": 0.4949, "step": 4739 }, { "epoch": 1.199696279422931, "grad_norm": 0.15078295767307281, "learning_rate": 1.4440102705291905e-05, "loss": 0.468, "step": 4740 }, { "epoch": 1.1999493799038219, "grad_norm": 0.16110962629318237, "learning_rate": 1.443795721169941e-05, "loss": 0.5009, "step": 4741 }, { "epoch": 1.2002024803847127, "grad_norm": 0.15197481215000153, "learning_rate": 1.4435811463688243e-05, "loss": 0.4978, "step": 4742 }, { "epoch": 1.2004555808656037, "grad_norm": 0.1574021279811859, "learning_rate": 1.4433665461381416e-05, "loss": 0.5039, "step": 4743 }, { "epoch": 1.2007086813464944, "grad_norm": 0.14959901571273804, "learning_rate": 1.4431519204901952e-05, "loss": 0.5024, "step": 4744 }, { "epoch": 1.2009617818273854, "grad_norm": 0.1517789661884308, "learning_rate": 1.4429372694372892e-05, "loss": 0.5026, "step": 4745 }, { "epoch": 1.2012148823082764, "grad_norm": 0.14926299452781677, "learning_rate": 1.4427225929917293e-05, "loss": 0.4796, "step": 4746 }, { "epoch": 1.2014679827891672, "grad_norm": 0.15355639159679413, "learning_rate": 1.4425078911658222e-05, "loss": 0.4987, "step": 4747 }, { "epoch": 1.2017210832700582, "grad_norm": 0.1582934558391571, "learning_rate": 1.442293163971876e-05, "loss": 0.5175, "step": 4748 }, { "epoch": 1.2019741837509492, "grad_norm": 0.15438798069953918, "learning_rate": 1.4420784114222012e-05, "loss": 0.478, "step": 4749 }, { "epoch": 1.20222728423184, "grad_norm": 0.15687960386276245, "learning_rate": 1.441863633529109e-05, "loss": 0.4867, "step": 4750 }, { "epoch": 1.202480384712731, "grad_norm": 0.16872043907642365, "learning_rate": 1.4416488303049117e-05, "loss": 0.4983, "step": 4751 }, { "epoch": 1.2027334851936218, "grad_norm": 0.1515536606311798, "learning_rate": 1.4414340017619239e-05, "loss": 0.5239, "step": 4752 }, { "epoch": 1.2029865856745128, "grad_norm": 0.164213627576828, "learning_rate": 1.4412191479124613e-05, "loss": 0.531, "step": 4753 }, { "epoch": 1.2032396861554038, "grad_norm": 0.15183062851428986, "learning_rate": 1.4410042687688405e-05, "loss": 0.4636, "step": 4754 }, { "epoch": 1.2034927866362946, "grad_norm": 0.15222394466400146, "learning_rate": 1.440789364343381e-05, "loss": 0.5133, "step": 4755 }, { "epoch": 1.2037458871171856, "grad_norm": 0.15674428641796112, "learning_rate": 1.4405744346484021e-05, "loss": 0.4982, "step": 4756 }, { "epoch": 1.2039989875980766, "grad_norm": 0.15081147849559784, "learning_rate": 1.4403594796962252e-05, "loss": 0.5196, "step": 4757 }, { "epoch": 1.2042520880789673, "grad_norm": 0.15732020139694214, "learning_rate": 1.4401444994991734e-05, "loss": 0.4954, "step": 4758 }, { "epoch": 1.2045051885598583, "grad_norm": 0.15141361951828003, "learning_rate": 1.4399294940695714e-05, "loss": 0.4821, "step": 4759 }, { "epoch": 1.204758289040749, "grad_norm": 0.20991456508636475, "learning_rate": 1.4397144634197446e-05, "loss": 0.4912, "step": 4760 }, { "epoch": 1.20501138952164, "grad_norm": 0.15904219448566437, "learning_rate": 1.4394994075620202e-05, "loss": 0.4718, "step": 4761 }, { "epoch": 1.205264490002531, "grad_norm": 0.15518103539943695, "learning_rate": 1.4392843265087273e-05, "loss": 0.508, "step": 4762 }, { "epoch": 1.2055175904834219, "grad_norm": 0.15759392082691193, "learning_rate": 1.4390692202721956e-05, "loss": 0.4959, "step": 4763 }, { "epoch": 1.2057706909643129, "grad_norm": 0.16693395376205444, "learning_rate": 1.4388540888647566e-05, "loss": 0.4791, "step": 4764 }, { "epoch": 1.2060237914452037, "grad_norm": 0.15320435166358948, "learning_rate": 1.4386389322987442e-05, "loss": 0.4877, "step": 4765 }, { "epoch": 1.2062768919260947, "grad_norm": 0.15438489615917206, "learning_rate": 1.438423750586492e-05, "loss": 0.5016, "step": 4766 }, { "epoch": 1.2065299924069857, "grad_norm": 0.15596912801265717, "learning_rate": 1.438208543740336e-05, "loss": 0.4897, "step": 4767 }, { "epoch": 1.2067830928878764, "grad_norm": 0.15400159358978271, "learning_rate": 1.4379933117726141e-05, "loss": 0.5016, "step": 4768 }, { "epoch": 1.2070361933687674, "grad_norm": 0.15354855358600616, "learning_rate": 1.4377780546956645e-05, "loss": 0.4888, "step": 4769 }, { "epoch": 1.2072892938496582, "grad_norm": 0.16207674145698547, "learning_rate": 1.4375627725218277e-05, "loss": 0.488, "step": 4770 }, { "epoch": 1.2075423943305492, "grad_norm": 0.15976330637931824, "learning_rate": 1.4373474652634452e-05, "loss": 0.4991, "step": 4771 }, { "epoch": 1.2077954948114402, "grad_norm": 0.15666639804840088, "learning_rate": 1.4371321329328605e-05, "loss": 0.4989, "step": 4772 }, { "epoch": 1.208048595292331, "grad_norm": 0.15730991959571838, "learning_rate": 1.4369167755424177e-05, "loss": 0.5074, "step": 4773 }, { "epoch": 1.208301695773222, "grad_norm": 0.18613293766975403, "learning_rate": 1.4367013931044632e-05, "loss": 0.5233, "step": 4774 }, { "epoch": 1.2085547962541128, "grad_norm": 0.1497037261724472, "learning_rate": 1.4364859856313438e-05, "loss": 0.4977, "step": 4775 }, { "epoch": 1.2088078967350038, "grad_norm": 0.16138841211795807, "learning_rate": 1.4362705531354089e-05, "loss": 0.5178, "step": 4776 }, { "epoch": 1.2090609972158948, "grad_norm": 0.15582409501075745, "learning_rate": 1.4360550956290087e-05, "loss": 0.4872, "step": 4777 }, { "epoch": 1.2093140976967856, "grad_norm": 0.16016504168510437, "learning_rate": 1.4358396131244947e-05, "loss": 0.4895, "step": 4778 }, { "epoch": 1.2095671981776766, "grad_norm": 0.1552554965019226, "learning_rate": 1.4356241056342204e-05, "loss": 0.5103, "step": 4779 }, { "epoch": 1.2098202986585673, "grad_norm": 0.18601174652576447, "learning_rate": 1.4354085731705398e-05, "loss": 0.4942, "step": 4780 }, { "epoch": 1.2100733991394583, "grad_norm": 0.15243934094905853, "learning_rate": 1.43519301574581e-05, "loss": 0.5002, "step": 4781 }, { "epoch": 1.2103264996203493, "grad_norm": 0.15824668109416962, "learning_rate": 1.4349774333723875e-05, "loss": 0.5174, "step": 4782 }, { "epoch": 1.21057960010124, "grad_norm": 0.15547820925712585, "learning_rate": 1.4347618260626307e-05, "loss": 0.485, "step": 4783 }, { "epoch": 1.210832700582131, "grad_norm": 0.1543138474225998, "learning_rate": 1.4345461938289016e-05, "loss": 0.4916, "step": 4784 }, { "epoch": 1.211085801063022, "grad_norm": 0.15523259341716766, "learning_rate": 1.4343305366835606e-05, "loss": 0.4844, "step": 4785 }, { "epoch": 1.2113389015439129, "grad_norm": 0.15505847334861755, "learning_rate": 1.4341148546389711e-05, "loss": 0.4998, "step": 4786 }, { "epoch": 1.2115920020248039, "grad_norm": 0.1591566652059555, "learning_rate": 1.4338991477074982e-05, "loss": 0.5224, "step": 4787 }, { "epoch": 1.2118451025056949, "grad_norm": 0.15748976171016693, "learning_rate": 1.4336834159015073e-05, "loss": 0.5003, "step": 4788 }, { "epoch": 1.2120982029865857, "grad_norm": 0.15617087483406067, "learning_rate": 1.433467659233366e-05, "loss": 0.4953, "step": 4789 }, { "epoch": 1.2123513034674767, "grad_norm": 0.1618586927652359, "learning_rate": 1.4332518777154438e-05, "loss": 0.5056, "step": 4790 }, { "epoch": 1.2126044039483674, "grad_norm": 0.15454545617103577, "learning_rate": 1.4330360713601102e-05, "loss": 0.4843, "step": 4791 }, { "epoch": 1.2128575044292584, "grad_norm": 0.1625894457101822, "learning_rate": 1.4328202401797371e-05, "loss": 0.5014, "step": 4792 }, { "epoch": 1.2131106049101494, "grad_norm": 0.16028928756713867, "learning_rate": 1.4326043841866979e-05, "loss": 0.5221, "step": 4793 }, { "epoch": 1.2133637053910402, "grad_norm": 0.1593734174966812, "learning_rate": 1.4323885033933669e-05, "loss": 0.5049, "step": 4794 }, { "epoch": 1.2136168058719312, "grad_norm": 0.15851549804210663, "learning_rate": 1.4321725978121202e-05, "loss": 0.5214, "step": 4795 }, { "epoch": 1.213869906352822, "grad_norm": 0.1454022228717804, "learning_rate": 1.431956667455335e-05, "loss": 0.4696, "step": 4796 }, { "epoch": 1.214123006833713, "grad_norm": 0.1527976393699646, "learning_rate": 1.4317407123353908e-05, "loss": 0.4976, "step": 4797 }, { "epoch": 1.214376107314604, "grad_norm": 0.15446287393569946, "learning_rate": 1.4315247324646673e-05, "loss": 0.4711, "step": 4798 }, { "epoch": 1.2146292077954948, "grad_norm": 0.15269722044467926, "learning_rate": 1.431308727855546e-05, "loss": 0.4884, "step": 4799 }, { "epoch": 1.2148823082763858, "grad_norm": 0.15966922044754028, "learning_rate": 1.4310926985204104e-05, "loss": 0.4924, "step": 4800 }, { "epoch": 1.2151354087572765, "grad_norm": 0.15202133357524872, "learning_rate": 1.4308766444716451e-05, "loss": 0.4944, "step": 4801 }, { "epoch": 1.2153885092381675, "grad_norm": 0.1693439781665802, "learning_rate": 1.4306605657216352e-05, "loss": 0.4961, "step": 4802 }, { "epoch": 1.2156416097190585, "grad_norm": 0.17105434834957123, "learning_rate": 1.4304444622827688e-05, "loss": 0.4785, "step": 4803 }, { "epoch": 1.2158947101999493, "grad_norm": 0.15646637976169586, "learning_rate": 1.4302283341674348e-05, "loss": 0.4768, "step": 4804 }, { "epoch": 1.2161478106808403, "grad_norm": 0.1583808958530426, "learning_rate": 1.4300121813880226e-05, "loss": 0.4839, "step": 4805 }, { "epoch": 1.216400911161731, "grad_norm": 0.15823723375797272, "learning_rate": 1.4297960039569246e-05, "loss": 0.5172, "step": 4806 }, { "epoch": 1.216654011642622, "grad_norm": 0.15845121443271637, "learning_rate": 1.4295798018865332e-05, "loss": 0.4937, "step": 4807 }, { "epoch": 1.216907112123513, "grad_norm": 0.15704643726348877, "learning_rate": 1.4293635751892427e-05, "loss": 0.5068, "step": 4808 }, { "epoch": 1.2171602126044039, "grad_norm": 0.15900589525699615, "learning_rate": 1.4291473238774496e-05, "loss": 0.488, "step": 4809 }, { "epoch": 1.2174133130852949, "grad_norm": 0.16417433321475983, "learning_rate": 1.4289310479635506e-05, "loss": 0.4835, "step": 4810 }, { "epoch": 1.2176664135661857, "grad_norm": 0.15618450939655304, "learning_rate": 1.4287147474599443e-05, "loss": 0.4814, "step": 4811 }, { "epoch": 1.2179195140470767, "grad_norm": 0.15520523488521576, "learning_rate": 1.4284984223790315e-05, "loss": 0.4947, "step": 4812 }, { "epoch": 1.2181726145279677, "grad_norm": 0.1571733057498932, "learning_rate": 1.4282820727332128e-05, "loss": 0.4834, "step": 4813 }, { "epoch": 1.2184257150088584, "grad_norm": 0.15829692780971527, "learning_rate": 1.4280656985348911e-05, "loss": 0.4842, "step": 4814 }, { "epoch": 1.2186788154897494, "grad_norm": 0.15376582741737366, "learning_rate": 1.4278492997964712e-05, "loss": 0.4876, "step": 4815 }, { "epoch": 1.2189319159706404, "grad_norm": 0.15035538375377655, "learning_rate": 1.4276328765303584e-05, "loss": 0.4874, "step": 4816 }, { "epoch": 1.2191850164515312, "grad_norm": 0.1485273540019989, "learning_rate": 1.42741642874896e-05, "loss": 0.493, "step": 4817 }, { "epoch": 1.2194381169324222, "grad_norm": 0.15596747398376465, "learning_rate": 1.4271999564646844e-05, "loss": 0.4949, "step": 4818 }, { "epoch": 1.219691217413313, "grad_norm": 0.1539386361837387, "learning_rate": 1.4269834596899415e-05, "loss": 0.5077, "step": 4819 }, { "epoch": 1.219944317894204, "grad_norm": 0.16498354077339172, "learning_rate": 1.4267669384371426e-05, "loss": 0.5209, "step": 4820 }, { "epoch": 1.220197418375095, "grad_norm": 0.17116999626159668, "learning_rate": 1.4265503927187002e-05, "loss": 0.5093, "step": 4821 }, { "epoch": 1.2204505188559858, "grad_norm": 0.15928888320922852, "learning_rate": 1.4263338225470287e-05, "loss": 0.4988, "step": 4822 }, { "epoch": 1.2207036193368768, "grad_norm": 0.18627600371837616, "learning_rate": 1.4261172279345439e-05, "loss": 0.4752, "step": 4823 }, { "epoch": 1.2209567198177678, "grad_norm": 0.15341739356517792, "learning_rate": 1.4259006088936618e-05, "loss": 0.4896, "step": 4824 }, { "epoch": 1.2212098202986585, "grad_norm": 0.15962594747543335, "learning_rate": 1.4256839654368011e-05, "loss": 0.4671, "step": 4825 }, { "epoch": 1.2214629207795495, "grad_norm": 0.1591014266014099, "learning_rate": 1.4254672975763821e-05, "loss": 0.4949, "step": 4826 }, { "epoch": 1.2217160212604403, "grad_norm": 0.15782347321510315, "learning_rate": 1.4252506053248251e-05, "loss": 0.5099, "step": 4827 }, { "epoch": 1.2219691217413313, "grad_norm": 0.15230417251586914, "learning_rate": 1.4250338886945531e-05, "loss": 0.4867, "step": 4828 }, { "epoch": 1.2222222222222223, "grad_norm": 0.15019799768924713, "learning_rate": 1.4248171476979903e-05, "loss": 0.4655, "step": 4829 }, { "epoch": 1.222475322703113, "grad_norm": 0.15211421251296997, "learning_rate": 1.424600382347561e-05, "loss": 0.4916, "step": 4830 }, { "epoch": 1.222728423184004, "grad_norm": 0.15335409343242645, "learning_rate": 1.4243835926556928e-05, "loss": 0.4725, "step": 4831 }, { "epoch": 1.2229815236648949, "grad_norm": 0.15465988218784332, "learning_rate": 1.4241667786348132e-05, "loss": 0.5022, "step": 4832 }, { "epoch": 1.2232346241457859, "grad_norm": 0.15888820588588715, "learning_rate": 1.423949940297352e-05, "loss": 0.4822, "step": 4833 }, { "epoch": 1.2234877246266769, "grad_norm": 0.15422068536281586, "learning_rate": 1.4237330776557401e-05, "loss": 0.5246, "step": 4834 }, { "epoch": 1.2237408251075677, "grad_norm": 0.1552008092403412, "learning_rate": 1.4235161907224098e-05, "loss": 0.512, "step": 4835 }, { "epoch": 1.2239939255884587, "grad_norm": 0.15023422241210938, "learning_rate": 1.4232992795097944e-05, "loss": 0.497, "step": 4836 }, { "epoch": 1.2242470260693494, "grad_norm": 0.15673129260540009, "learning_rate": 1.4230823440303293e-05, "loss": 0.5032, "step": 4837 }, { "epoch": 1.2245001265502404, "grad_norm": 0.15330225229263306, "learning_rate": 1.422865384296451e-05, "loss": 0.4775, "step": 4838 }, { "epoch": 1.2247532270311314, "grad_norm": 0.15421536564826965, "learning_rate": 1.4226484003205972e-05, "loss": 0.5463, "step": 4839 }, { "epoch": 1.2250063275120222, "grad_norm": 0.14725005626678467, "learning_rate": 1.4224313921152072e-05, "loss": 0.4962, "step": 4840 }, { "epoch": 1.2252594279929132, "grad_norm": 0.15515382587909698, "learning_rate": 1.4222143596927213e-05, "loss": 0.4686, "step": 4841 }, { "epoch": 1.225512528473804, "grad_norm": 0.1800537109375, "learning_rate": 1.4219973030655819e-05, "loss": 0.5015, "step": 4842 }, { "epoch": 1.225765628954695, "grad_norm": 0.15423104166984558, "learning_rate": 1.4217802222462323e-05, "loss": 0.4802, "step": 4843 }, { "epoch": 1.226018729435586, "grad_norm": 0.15170638263225555, "learning_rate": 1.4215631172471171e-05, "loss": 0.5129, "step": 4844 }, { "epoch": 1.2262718299164768, "grad_norm": 0.16045807301998138, "learning_rate": 1.4213459880806832e-05, "loss": 0.4907, "step": 4845 }, { "epoch": 1.2265249303973678, "grad_norm": 0.1591181755065918, "learning_rate": 1.4211288347593768e-05, "loss": 0.489, "step": 4846 }, { "epoch": 1.2267780308782588, "grad_norm": 0.15312208235263824, "learning_rate": 1.420911657295648e-05, "loss": 0.4774, "step": 4847 }, { "epoch": 1.2270311313591495, "grad_norm": 0.15998266637325287, "learning_rate": 1.420694455701947e-05, "loss": 0.5264, "step": 4848 }, { "epoch": 1.2272842318400405, "grad_norm": 0.1599723845720291, "learning_rate": 1.4204772299907248e-05, "loss": 0.5206, "step": 4849 }, { "epoch": 1.2275373323209313, "grad_norm": 0.18506330251693726, "learning_rate": 1.4202599801744353e-05, "loss": 0.5216, "step": 4850 }, { "epoch": 1.2277904328018223, "grad_norm": 0.15713363885879517, "learning_rate": 1.4200427062655325e-05, "loss": 0.521, "step": 4851 }, { "epoch": 1.2280435332827133, "grad_norm": 0.16612905263900757, "learning_rate": 1.419825408276472e-05, "loss": 0.5043, "step": 4852 }, { "epoch": 1.228296633763604, "grad_norm": 0.2238139808177948, "learning_rate": 1.4196080862197119e-05, "loss": 0.5106, "step": 4853 }, { "epoch": 1.228549734244495, "grad_norm": 0.15952655673027039, "learning_rate": 1.4193907401077103e-05, "loss": 0.4919, "step": 4854 }, { "epoch": 1.228802834725386, "grad_norm": 0.1556146740913391, "learning_rate": 1.4191733699529268e-05, "loss": 0.4985, "step": 4855 }, { "epoch": 1.2290559352062769, "grad_norm": 0.15565980970859528, "learning_rate": 1.4189559757678235e-05, "loss": 0.5083, "step": 4856 }, { "epoch": 1.2293090356871679, "grad_norm": 0.15723058581352234, "learning_rate": 1.4187385575648627e-05, "loss": 0.4859, "step": 4857 }, { "epoch": 1.2295621361680587, "grad_norm": 0.15739674866199493, "learning_rate": 1.4185211153565086e-05, "loss": 0.5041, "step": 4858 }, { "epoch": 1.2298152366489497, "grad_norm": 0.1638237088918686, "learning_rate": 1.4183036491552267e-05, "loss": 0.4963, "step": 4859 }, { "epoch": 1.2300683371298406, "grad_norm": 0.15486039221286774, "learning_rate": 1.418086158973484e-05, "loss": 0.4873, "step": 4860 }, { "epoch": 1.2303214376107314, "grad_norm": 0.17234180867671967, "learning_rate": 1.4178686448237486e-05, "loss": 0.5064, "step": 4861 }, { "epoch": 1.2305745380916224, "grad_norm": 0.15550485253334045, "learning_rate": 1.4176511067184904e-05, "loss": 0.479, "step": 4862 }, { "epoch": 1.2308276385725132, "grad_norm": 0.16810059547424316, "learning_rate": 1.41743354467018e-05, "loss": 0.4738, "step": 4863 }, { "epoch": 1.2310807390534042, "grad_norm": 0.1560376137495041, "learning_rate": 1.41721595869129e-05, "loss": 0.4879, "step": 4864 }, { "epoch": 1.2313338395342952, "grad_norm": 0.14771954715251923, "learning_rate": 1.416998348794294e-05, "loss": 0.4663, "step": 4865 }, { "epoch": 1.231586940015186, "grad_norm": 0.15605133771896362, "learning_rate": 1.4167807149916672e-05, "loss": 0.502, "step": 4866 }, { "epoch": 1.231840040496077, "grad_norm": 0.15336942672729492, "learning_rate": 1.4165630572958867e-05, "loss": 0.4778, "step": 4867 }, { "epoch": 1.2320931409769678, "grad_norm": 0.16873812675476074, "learning_rate": 1.416345375719429e-05, "loss": 0.5094, "step": 4868 }, { "epoch": 1.2323462414578588, "grad_norm": 0.16564925014972687, "learning_rate": 1.4161276702747744e-05, "loss": 0.4945, "step": 4869 }, { "epoch": 1.2325993419387498, "grad_norm": 0.15213942527770996, "learning_rate": 1.4159099409744033e-05, "loss": 0.487, "step": 4870 }, { "epoch": 1.2328524424196405, "grad_norm": 0.15122611820697784, "learning_rate": 1.4156921878307973e-05, "loss": 0.4857, "step": 4871 }, { "epoch": 1.2331055429005315, "grad_norm": 0.15353399515151978, "learning_rate": 1.4154744108564398e-05, "loss": 0.5029, "step": 4872 }, { "epoch": 1.2333586433814223, "grad_norm": 0.15547573566436768, "learning_rate": 1.415256610063816e-05, "loss": 0.4964, "step": 4873 }, { "epoch": 1.2336117438623133, "grad_norm": 0.15306177735328674, "learning_rate": 1.4150387854654113e-05, "loss": 0.4637, "step": 4874 }, { "epoch": 1.2338648443432043, "grad_norm": 0.1780419945716858, "learning_rate": 1.4148209370737137e-05, "loss": 0.4773, "step": 4875 }, { "epoch": 1.234117944824095, "grad_norm": 0.1591522991657257, "learning_rate": 1.4146030649012116e-05, "loss": 0.5024, "step": 4876 }, { "epoch": 1.234371045304986, "grad_norm": 0.1524720937013626, "learning_rate": 1.4143851689603954e-05, "loss": 0.5018, "step": 4877 }, { "epoch": 1.2346241457858769, "grad_norm": 0.14932334423065186, "learning_rate": 1.4141672492637564e-05, "loss": 0.4752, "step": 4878 }, { "epoch": 1.2348772462667679, "grad_norm": 0.15775595605373383, "learning_rate": 1.4139493058237874e-05, "loss": 0.4668, "step": 4879 }, { "epoch": 1.2351303467476589, "grad_norm": 0.1714020073413849, "learning_rate": 1.413731338652983e-05, "loss": 0.494, "step": 4880 }, { "epoch": 1.2353834472285496, "grad_norm": 0.1518697887659073, "learning_rate": 1.4135133477638386e-05, "loss": 0.5055, "step": 4881 }, { "epoch": 1.2356365477094406, "grad_norm": 0.1498219519853592, "learning_rate": 1.4132953331688508e-05, "loss": 0.4923, "step": 4882 }, { "epoch": 1.2358896481903316, "grad_norm": 0.15687674283981323, "learning_rate": 1.4130772948805184e-05, "loss": 0.4749, "step": 4883 }, { "epoch": 1.2361427486712224, "grad_norm": 0.15354859828948975, "learning_rate": 1.412859232911341e-05, "loss": 0.4897, "step": 4884 }, { "epoch": 1.2363958491521134, "grad_norm": 0.15198339521884918, "learning_rate": 1.4126411472738198e-05, "loss": 0.4951, "step": 4885 }, { "epoch": 1.2366489496330044, "grad_norm": 0.1575637310743332, "learning_rate": 1.4124230379804564e-05, "loss": 0.4963, "step": 4886 }, { "epoch": 1.2369020501138952, "grad_norm": 0.14900118112564087, "learning_rate": 1.4122049050437555e-05, "loss": 0.465, "step": 4887 }, { "epoch": 1.2371551505947862, "grad_norm": 0.16111548244953156, "learning_rate": 1.4119867484762216e-05, "loss": 0.5202, "step": 4888 }, { "epoch": 1.237408251075677, "grad_norm": 0.15612049400806427, "learning_rate": 1.4117685682903613e-05, "loss": 0.5068, "step": 4889 }, { "epoch": 1.237661351556568, "grad_norm": 0.15628957748413086, "learning_rate": 1.4115503644986826e-05, "loss": 0.4807, "step": 4890 }, { "epoch": 1.237914452037459, "grad_norm": 0.1582592874765396, "learning_rate": 1.4113321371136944e-05, "loss": 0.4877, "step": 4891 }, { "epoch": 1.2381675525183498, "grad_norm": 0.16074936091899872, "learning_rate": 1.4111138861479074e-05, "loss": 0.523, "step": 4892 }, { "epoch": 1.2384206529992408, "grad_norm": 0.15410254895687103, "learning_rate": 1.410895611613833e-05, "loss": 0.4949, "step": 4893 }, { "epoch": 1.2386737534801315, "grad_norm": 0.15247602760791779, "learning_rate": 1.4106773135239853e-05, "loss": 0.4859, "step": 4894 }, { "epoch": 1.2389268539610225, "grad_norm": 0.15582488477230072, "learning_rate": 1.4104589918908782e-05, "loss": 0.486, "step": 4895 }, { "epoch": 1.2391799544419135, "grad_norm": 0.1554383784532547, "learning_rate": 1.4102406467270276e-05, "loss": 0.5139, "step": 4896 }, { "epoch": 1.2394330549228043, "grad_norm": 0.15653489530086517, "learning_rate": 1.4100222780449511e-05, "loss": 0.491, "step": 4897 }, { "epoch": 1.2396861554036953, "grad_norm": 0.15589633584022522, "learning_rate": 1.4098038858571674e-05, "loss": 0.5025, "step": 4898 }, { "epoch": 1.239939255884586, "grad_norm": 0.1538195013999939, "learning_rate": 1.4095854701761958e-05, "loss": 0.4843, "step": 4899 }, { "epoch": 1.240192356365477, "grad_norm": 0.14805662631988525, "learning_rate": 1.4093670310145579e-05, "loss": 0.4895, "step": 4900 }, { "epoch": 1.240445456846368, "grad_norm": 0.149564728140831, "learning_rate": 1.4091485683847772e-05, "loss": 0.487, "step": 4901 }, { "epoch": 1.2406985573272589, "grad_norm": 0.15739673376083374, "learning_rate": 1.4089300822993763e-05, "loss": 0.4826, "step": 4902 }, { "epoch": 1.2409516578081499, "grad_norm": 0.1538989394903183, "learning_rate": 1.4087115727708816e-05, "loss": 0.4606, "step": 4903 }, { "epoch": 1.2412047582890406, "grad_norm": 0.15012723207473755, "learning_rate": 1.4084930398118192e-05, "loss": 0.4771, "step": 4904 }, { "epoch": 1.2414578587699316, "grad_norm": 0.16179558634757996, "learning_rate": 1.4082744834347176e-05, "loss": 0.5188, "step": 4905 }, { "epoch": 1.2417109592508226, "grad_norm": 0.15427063405513763, "learning_rate": 1.4080559036521055e-05, "loss": 0.4698, "step": 4906 }, { "epoch": 1.2419640597317134, "grad_norm": 0.15398575365543365, "learning_rate": 1.4078373004765143e-05, "loss": 0.4901, "step": 4907 }, { "epoch": 1.2422171602126044, "grad_norm": 0.16139110922813416, "learning_rate": 1.4076186739204758e-05, "loss": 0.4963, "step": 4908 }, { "epoch": 1.2424702606934952, "grad_norm": 0.16018272936344147, "learning_rate": 1.4074000239965233e-05, "loss": 0.4918, "step": 4909 }, { "epoch": 1.2427233611743862, "grad_norm": 0.159691721200943, "learning_rate": 1.4071813507171916e-05, "loss": 0.4876, "step": 4910 }, { "epoch": 1.2429764616552772, "grad_norm": 0.15758097171783447, "learning_rate": 1.4069626540950168e-05, "loss": 0.4926, "step": 4911 }, { "epoch": 1.243229562136168, "grad_norm": 0.1535804718732834, "learning_rate": 1.4067439341425364e-05, "loss": 0.4887, "step": 4912 }, { "epoch": 1.243482662617059, "grad_norm": 0.1542280614376068, "learning_rate": 1.406525190872289e-05, "loss": 0.5007, "step": 4913 }, { "epoch": 1.24373576309795, "grad_norm": 0.15456826984882355, "learning_rate": 1.4063064242968149e-05, "loss": 0.4859, "step": 4914 }, { "epoch": 1.2439888635788408, "grad_norm": 0.15653766691684723, "learning_rate": 1.4060876344286549e-05, "loss": 0.4891, "step": 4915 }, { "epoch": 1.2442419640597318, "grad_norm": 0.15363764762878418, "learning_rate": 1.4058688212803527e-05, "loss": 0.5066, "step": 4916 }, { "epoch": 1.2444950645406225, "grad_norm": 0.1483721137046814, "learning_rate": 1.4056499848644518e-05, "loss": 0.466, "step": 4917 }, { "epoch": 1.2447481650215135, "grad_norm": 0.15820713341236115, "learning_rate": 1.4054311251934975e-05, "loss": 0.5307, "step": 4918 }, { "epoch": 1.2450012655024045, "grad_norm": 0.1629936397075653, "learning_rate": 1.4052122422800372e-05, "loss": 0.5093, "step": 4919 }, { "epoch": 1.2452543659832953, "grad_norm": 0.15156689286231995, "learning_rate": 1.4049933361366186e-05, "loss": 0.4838, "step": 4920 }, { "epoch": 1.2455074664641863, "grad_norm": 0.16810351610183716, "learning_rate": 1.4047744067757909e-05, "loss": 0.4867, "step": 4921 }, { "epoch": 1.2457605669450773, "grad_norm": 0.15731008350849152, "learning_rate": 1.404555454210105e-05, "loss": 0.5017, "step": 4922 }, { "epoch": 1.246013667425968, "grad_norm": 0.1624636948108673, "learning_rate": 1.4043364784521135e-05, "loss": 0.4793, "step": 4923 }, { "epoch": 1.246266767906859, "grad_norm": 0.1574585735797882, "learning_rate": 1.4041174795143691e-05, "loss": 0.5018, "step": 4924 }, { "epoch": 1.2465198683877499, "grad_norm": 0.15658970177173615, "learning_rate": 1.4038984574094266e-05, "loss": 0.5205, "step": 4925 }, { "epoch": 1.2467729688686409, "grad_norm": 0.15486089885234833, "learning_rate": 1.4036794121498427e-05, "loss": 0.492, "step": 4926 }, { "epoch": 1.2470260693495319, "grad_norm": 0.1567653864622116, "learning_rate": 1.4034603437481742e-05, "loss": 0.4946, "step": 4927 }, { "epoch": 1.2472791698304226, "grad_norm": 0.1566191166639328, "learning_rate": 1.40324125221698e-05, "loss": 0.4861, "step": 4928 }, { "epoch": 1.2475322703113136, "grad_norm": 0.15150129795074463, "learning_rate": 1.4030221375688202e-05, "loss": 0.4653, "step": 4929 }, { "epoch": 1.2477853707922044, "grad_norm": 0.1553422212600708, "learning_rate": 1.4028029998162563e-05, "loss": 0.4889, "step": 4930 }, { "epoch": 1.2480384712730954, "grad_norm": 0.1568761020898819, "learning_rate": 1.4025838389718507e-05, "loss": 0.4569, "step": 4931 }, { "epoch": 1.2482915717539864, "grad_norm": 0.16511127352714539, "learning_rate": 1.4023646550481678e-05, "loss": 0.512, "step": 4932 }, { "epoch": 1.2485446722348772, "grad_norm": 0.15752407908439636, "learning_rate": 1.4021454480577726e-05, "loss": 0.4661, "step": 4933 }, { "epoch": 1.2487977727157682, "grad_norm": 0.15458279848098755, "learning_rate": 1.4019262180132318e-05, "loss": 0.4942, "step": 4934 }, { "epoch": 1.249050873196659, "grad_norm": 0.15776178240776062, "learning_rate": 1.4017069649271137e-05, "loss": 0.4634, "step": 4935 }, { "epoch": 1.24930397367755, "grad_norm": 0.1501118689775467, "learning_rate": 1.4014876888119872e-05, "loss": 0.49, "step": 4936 }, { "epoch": 1.249557074158441, "grad_norm": 0.1500958353281021, "learning_rate": 1.4012683896804233e-05, "loss": 0.4841, "step": 4937 }, { "epoch": 1.2498101746393318, "grad_norm": 0.15772713720798492, "learning_rate": 1.4010490675449939e-05, "loss": 0.488, "step": 4938 }, { "epoch": 1.2500632751202227, "grad_norm": 0.15695112943649292, "learning_rate": 1.4008297224182722e-05, "loss": 0.5043, "step": 4939 }, { "epoch": 1.2503163756011135, "grad_norm": 0.15333375334739685, "learning_rate": 1.4006103543128327e-05, "loss": 0.4816, "step": 4940 }, { "epoch": 1.2505694760820045, "grad_norm": 0.15186414122581482, "learning_rate": 1.4003909632412511e-05, "loss": 0.4907, "step": 4941 }, { "epoch": 1.2508225765628955, "grad_norm": 0.16254602372646332, "learning_rate": 1.4001715492161055e-05, "loss": 0.5058, "step": 4942 }, { "epoch": 1.2510756770437863, "grad_norm": 0.161966010928154, "learning_rate": 1.3999521122499732e-05, "loss": 0.4977, "step": 4943 }, { "epoch": 1.2513287775246773, "grad_norm": 0.15580295026302338, "learning_rate": 1.3997326523554348e-05, "loss": 0.4905, "step": 4944 }, { "epoch": 1.251581878005568, "grad_norm": 0.15178640186786652, "learning_rate": 1.3995131695450718e-05, "loss": 0.4792, "step": 4945 }, { "epoch": 1.251834978486459, "grad_norm": 0.14915382862091064, "learning_rate": 1.399293663831466e-05, "loss": 0.4846, "step": 4946 }, { "epoch": 1.25208807896735, "grad_norm": 0.1563478410243988, "learning_rate": 1.3990741352272013e-05, "loss": 0.5682, "step": 4947 }, { "epoch": 1.252341179448241, "grad_norm": 0.156821146607399, "learning_rate": 1.3988545837448632e-05, "loss": 0.5, "step": 4948 }, { "epoch": 1.2525942799291319, "grad_norm": 0.15283161401748657, "learning_rate": 1.398635009397038e-05, "loss": 0.4689, "step": 4949 }, { "epoch": 1.2528473804100229, "grad_norm": 0.15897133946418762, "learning_rate": 1.3984154121963129e-05, "loss": 0.4936, "step": 4950 }, { "epoch": 1.2531004808909136, "grad_norm": 0.1575917899608612, "learning_rate": 1.3981957921552778e-05, "loss": 0.4911, "step": 4951 }, { "epoch": 1.2533535813718046, "grad_norm": 0.15413157641887665, "learning_rate": 1.3979761492865222e-05, "loss": 0.5114, "step": 4952 }, { "epoch": 1.2536066818526956, "grad_norm": 0.15939849615097046, "learning_rate": 1.3977564836026386e-05, "loss": 0.5102, "step": 4953 }, { "epoch": 1.2538597823335864, "grad_norm": 0.1640951931476593, "learning_rate": 1.397536795116219e-05, "loss": 0.5083, "step": 4954 }, { "epoch": 1.2541128828144774, "grad_norm": 0.15115496516227722, "learning_rate": 1.3973170838398586e-05, "loss": 0.4695, "step": 4955 }, { "epoch": 1.2543659832953682, "grad_norm": 0.15289868414402008, "learning_rate": 1.3970973497861524e-05, "loss": 0.4725, "step": 4956 }, { "epoch": 1.2546190837762592, "grad_norm": 0.15848854184150696, "learning_rate": 1.3968775929676977e-05, "loss": 0.4916, "step": 4957 }, { "epoch": 1.2548721842571502, "grad_norm": 0.15122932195663452, "learning_rate": 1.3966578133970923e-05, "loss": 0.4684, "step": 4958 }, { "epoch": 1.255125284738041, "grad_norm": 0.1590738296508789, "learning_rate": 1.3964380110869357e-05, "loss": 0.5062, "step": 4959 }, { "epoch": 1.255378385218932, "grad_norm": 0.1521679311990738, "learning_rate": 1.3962181860498292e-05, "loss": 0.4736, "step": 4960 }, { "epoch": 1.2556314856998227, "grad_norm": 0.1556379348039627, "learning_rate": 1.3959983382983743e-05, "loss": 0.5006, "step": 4961 }, { "epoch": 1.2558845861807137, "grad_norm": 0.15740276873111725, "learning_rate": 1.395778467845175e-05, "loss": 0.5061, "step": 4962 }, { "epoch": 1.2561376866616047, "grad_norm": 0.15278546512126923, "learning_rate": 1.3955585747028353e-05, "loss": 0.4813, "step": 4963 }, { "epoch": 1.2563907871424955, "grad_norm": 0.15900678932666779, "learning_rate": 1.395338658883962e-05, "loss": 0.5008, "step": 4964 }, { "epoch": 1.2566438876233865, "grad_norm": 0.15666881203651428, "learning_rate": 1.3951187204011618e-05, "loss": 0.475, "step": 4965 }, { "epoch": 1.2568969881042773, "grad_norm": 0.16239850223064423, "learning_rate": 1.3948987592670431e-05, "loss": 0.5037, "step": 4966 }, { "epoch": 1.2571500885851683, "grad_norm": 0.16286076605319977, "learning_rate": 1.3946787754942168e-05, "loss": 0.4937, "step": 4967 }, { "epoch": 1.2574031890660593, "grad_norm": 0.15182296931743622, "learning_rate": 1.3944587690952934e-05, "loss": 0.4865, "step": 4968 }, { "epoch": 1.25765628954695, "grad_norm": 0.15502695739269257, "learning_rate": 1.3942387400828852e-05, "loss": 0.5181, "step": 4969 }, { "epoch": 1.257909390027841, "grad_norm": 0.19935202598571777, "learning_rate": 1.3940186884696068e-05, "loss": 0.505, "step": 4970 }, { "epoch": 1.2581624905087319, "grad_norm": 0.16324858367443085, "learning_rate": 1.3937986142680724e-05, "loss": 0.4972, "step": 4971 }, { "epoch": 1.2584155909896229, "grad_norm": 0.14911402761936188, "learning_rate": 1.3935785174908985e-05, "loss": 0.4957, "step": 4972 }, { "epoch": 1.2586686914705139, "grad_norm": 0.15820351243019104, "learning_rate": 1.3933583981507037e-05, "loss": 0.5119, "step": 4973 }, { "epoch": 1.2589217919514046, "grad_norm": 0.1612289547920227, "learning_rate": 1.3931382562601062e-05, "loss": 0.4711, "step": 4974 }, { "epoch": 1.2591748924322956, "grad_norm": 0.1494307965040207, "learning_rate": 1.3929180918317262e-05, "loss": 0.4855, "step": 4975 }, { "epoch": 1.2594279929131864, "grad_norm": 0.15580785274505615, "learning_rate": 1.3926979048781854e-05, "loss": 0.4939, "step": 4976 }, { "epoch": 1.2596810933940774, "grad_norm": 0.15612474083900452, "learning_rate": 1.3924776954121072e-05, "loss": 0.489, "step": 4977 }, { "epoch": 1.2599341938749684, "grad_norm": 0.1577444076538086, "learning_rate": 1.3922574634461147e-05, "loss": 0.5077, "step": 4978 }, { "epoch": 1.2601872943558594, "grad_norm": 0.16252046823501587, "learning_rate": 1.3920372089928343e-05, "loss": 0.4733, "step": 4979 }, { "epoch": 1.2604403948367502, "grad_norm": 0.15684834122657776, "learning_rate": 1.3918169320648922e-05, "loss": 0.495, "step": 4980 }, { "epoch": 1.260693495317641, "grad_norm": 0.1639450490474701, "learning_rate": 1.3915966326749164e-05, "loss": 0.5361, "step": 4981 }, { "epoch": 1.260946595798532, "grad_norm": 0.15660099685192108, "learning_rate": 1.3913763108355364e-05, "loss": 0.5104, "step": 4982 }, { "epoch": 1.261199696279423, "grad_norm": 0.15481001138687134, "learning_rate": 1.3911559665593828e-05, "loss": 0.5014, "step": 4983 }, { "epoch": 1.261452796760314, "grad_norm": 0.16124440729618073, "learning_rate": 1.3909355998590875e-05, "loss": 0.5143, "step": 4984 }, { "epoch": 1.2617058972412047, "grad_norm": 0.1568736582994461, "learning_rate": 1.390715210747283e-05, "loss": 0.5453, "step": 4985 }, { "epoch": 1.2619589977220957, "grad_norm": 0.16312968730926514, "learning_rate": 1.3904947992366047e-05, "loss": 0.4913, "step": 4986 }, { "epoch": 1.2622120982029865, "grad_norm": 0.16222339868545532, "learning_rate": 1.390274365339688e-05, "loss": 0.4784, "step": 4987 }, { "epoch": 1.2624651986838775, "grad_norm": 0.15709851682186127, "learning_rate": 1.3900539090691694e-05, "loss": 0.4855, "step": 4988 }, { "epoch": 1.2627182991647685, "grad_norm": 0.15653419494628906, "learning_rate": 1.389833430437688e-05, "loss": 0.4688, "step": 4989 }, { "epoch": 1.2629713996456593, "grad_norm": 0.1625477522611618, "learning_rate": 1.3896129294578826e-05, "loss": 0.5133, "step": 4990 }, { "epoch": 1.2632245001265503, "grad_norm": 0.16175898909568787, "learning_rate": 1.3893924061423944e-05, "loss": 0.4838, "step": 4991 }, { "epoch": 1.263477600607441, "grad_norm": 0.15628497302532196, "learning_rate": 1.3891718605038657e-05, "loss": 0.4891, "step": 4992 }, { "epoch": 1.263730701088332, "grad_norm": 0.1590224653482437, "learning_rate": 1.3889512925549399e-05, "loss": 0.4529, "step": 4993 }, { "epoch": 1.263983801569223, "grad_norm": 0.15723086893558502, "learning_rate": 1.3887307023082614e-05, "loss": 0.5041, "step": 4994 }, { "epoch": 1.2642369020501139, "grad_norm": 0.15514899790287018, "learning_rate": 1.3885100897764763e-05, "loss": 0.5099, "step": 4995 }, { "epoch": 1.2644900025310049, "grad_norm": 0.15988989174365997, "learning_rate": 1.3882894549722322e-05, "loss": 0.4878, "step": 4996 }, { "epoch": 1.2647431030118956, "grad_norm": 0.15946030616760254, "learning_rate": 1.3880687979081769e-05, "loss": 0.4892, "step": 4997 }, { "epoch": 1.2649962034927866, "grad_norm": 0.1601671427488327, "learning_rate": 1.3878481185969608e-05, "loss": 0.4656, "step": 4998 }, { "epoch": 1.2652493039736776, "grad_norm": 0.16230936348438263, "learning_rate": 1.3876274170512347e-05, "loss": 0.51, "step": 4999 }, { "epoch": 1.2655024044545684, "grad_norm": 0.1547393500804901, "learning_rate": 1.387406693283651e-05, "loss": 0.4946, "step": 5000 }, { "epoch": 1.2657555049354594, "grad_norm": 0.15427574515342712, "learning_rate": 1.3871859473068634e-05, "loss": 0.4924, "step": 5001 }, { "epoch": 1.2660086054163502, "grad_norm": 0.16172130405902863, "learning_rate": 1.3869651791335267e-05, "loss": 0.4871, "step": 5002 }, { "epoch": 1.2662617058972412, "grad_norm": 0.15750892460346222, "learning_rate": 1.3867443887762972e-05, "loss": 0.4968, "step": 5003 }, { "epoch": 1.2665148063781322, "grad_norm": 0.1572975218296051, "learning_rate": 1.3865235762478323e-05, "loss": 0.4959, "step": 5004 }, { "epoch": 1.266767906859023, "grad_norm": 0.16880685091018677, "learning_rate": 1.3863027415607908e-05, "loss": 0.4717, "step": 5005 }, { "epoch": 1.267021007339914, "grad_norm": 0.15943951904773712, "learning_rate": 1.3860818847278326e-05, "loss": 0.5135, "step": 5006 }, { "epoch": 1.2672741078208047, "grad_norm": 0.15427380800247192, "learning_rate": 1.3858610057616184e-05, "loss": 0.4855, "step": 5007 }, { "epoch": 1.2675272083016957, "grad_norm": 0.15371760725975037, "learning_rate": 1.3856401046748115e-05, "loss": 0.4768, "step": 5008 }, { "epoch": 1.2677803087825867, "grad_norm": 0.15620753169059753, "learning_rate": 1.3854191814800759e-05, "loss": 0.4982, "step": 5009 }, { "epoch": 1.2680334092634775, "grad_norm": 0.1564817726612091, "learning_rate": 1.3851982361900756e-05, "loss": 0.4985, "step": 5010 }, { "epoch": 1.2682865097443685, "grad_norm": 0.15644876658916473, "learning_rate": 1.3849772688174778e-05, "loss": 0.4976, "step": 5011 }, { "epoch": 1.2685396102252593, "grad_norm": 0.16018062829971313, "learning_rate": 1.3847562793749498e-05, "loss": 0.4941, "step": 5012 }, { "epoch": 1.2687927107061503, "grad_norm": 0.15231181681156158, "learning_rate": 1.3845352678751602e-05, "loss": 0.4652, "step": 5013 }, { "epoch": 1.2690458111870413, "grad_norm": 0.15957076847553253, "learning_rate": 1.3843142343307798e-05, "loss": 0.4709, "step": 5014 }, { "epoch": 1.2692989116679323, "grad_norm": 0.15874424576759338, "learning_rate": 1.3840931787544793e-05, "loss": 0.4919, "step": 5015 }, { "epoch": 1.269552012148823, "grad_norm": 0.16100528836250305, "learning_rate": 1.3838721011589317e-05, "loss": 0.5066, "step": 5016 }, { "epoch": 1.269805112629714, "grad_norm": 0.1557178795337677, "learning_rate": 1.383651001556811e-05, "loss": 0.499, "step": 5017 }, { "epoch": 1.2700582131106049, "grad_norm": 0.1559823602437973, "learning_rate": 1.383429879960792e-05, "loss": 0.5082, "step": 5018 }, { "epoch": 1.2703113135914958, "grad_norm": 0.15654444694519043, "learning_rate": 1.3832087363835513e-05, "loss": 0.4903, "step": 5019 }, { "epoch": 1.2705644140723868, "grad_norm": 0.1572248935699463, "learning_rate": 1.3829875708377666e-05, "loss": 0.4847, "step": 5020 }, { "epoch": 1.2708175145532776, "grad_norm": 0.1650071144104004, "learning_rate": 1.3827663833361168e-05, "loss": 0.5018, "step": 5021 }, { "epoch": 1.2710706150341686, "grad_norm": 0.16747570037841797, "learning_rate": 1.3825451738912825e-05, "loss": 0.4984, "step": 5022 }, { "epoch": 1.2713237155150594, "grad_norm": 0.16215550899505615, "learning_rate": 1.3823239425159447e-05, "loss": 0.505, "step": 5023 }, { "epoch": 1.2715768159959504, "grad_norm": 0.16393990814685822, "learning_rate": 1.3821026892227862e-05, "loss": 0.4813, "step": 5024 }, { "epoch": 1.2718299164768414, "grad_norm": 0.15511666238307953, "learning_rate": 1.3818814140244914e-05, "loss": 0.47, "step": 5025 }, { "epoch": 1.2720830169577322, "grad_norm": 0.1569250375032425, "learning_rate": 1.3816601169337446e-05, "loss": 0.4925, "step": 5026 }, { "epoch": 1.2723361174386232, "grad_norm": 0.16024096310138702, "learning_rate": 1.381438797963233e-05, "loss": 0.4881, "step": 5027 }, { "epoch": 1.272589217919514, "grad_norm": 0.15558162331581116, "learning_rate": 1.3812174571256448e-05, "loss": 0.5001, "step": 5028 }, { "epoch": 1.272842318400405, "grad_norm": 0.16200292110443115, "learning_rate": 1.380996094433668e-05, "loss": 0.4902, "step": 5029 }, { "epoch": 1.273095418881296, "grad_norm": 0.1631753146648407, "learning_rate": 1.3807747098999934e-05, "loss": 0.4867, "step": 5030 }, { "epoch": 1.2733485193621867, "grad_norm": 0.16207389533519745, "learning_rate": 1.3805533035373125e-05, "loss": 0.4909, "step": 5031 }, { "epoch": 1.2736016198430777, "grad_norm": 0.1628161519765854, "learning_rate": 1.3803318753583175e-05, "loss": 0.4713, "step": 5032 }, { "epoch": 1.2738547203239685, "grad_norm": 0.1628548949956894, "learning_rate": 1.3801104253757032e-05, "loss": 0.5084, "step": 5033 }, { "epoch": 1.2741078208048595, "grad_norm": 0.1614101678133011, "learning_rate": 1.3798889536021647e-05, "loss": 0.4879, "step": 5034 }, { "epoch": 1.2743609212857505, "grad_norm": 0.15940417349338531, "learning_rate": 1.379667460050398e-05, "loss": 0.4944, "step": 5035 }, { "epoch": 1.2746140217666413, "grad_norm": 0.1509905606508255, "learning_rate": 1.3794459447331014e-05, "loss": 0.4852, "step": 5036 }, { "epoch": 1.2748671222475323, "grad_norm": 0.15168650448322296, "learning_rate": 1.3792244076629738e-05, "loss": 0.4812, "step": 5037 }, { "epoch": 1.275120222728423, "grad_norm": 0.15908123552799225, "learning_rate": 1.379002848852715e-05, "loss": 0.4823, "step": 5038 }, { "epoch": 1.275373323209314, "grad_norm": 0.15745550394058228, "learning_rate": 1.3787812683150268e-05, "loss": 0.4876, "step": 5039 }, { "epoch": 1.275626423690205, "grad_norm": 0.15144307911396027, "learning_rate": 1.3785596660626123e-05, "loss": 0.4857, "step": 5040 }, { "epoch": 1.2758795241710958, "grad_norm": 0.1597142219543457, "learning_rate": 1.378338042108175e-05, "loss": 0.5082, "step": 5041 }, { "epoch": 1.2761326246519868, "grad_norm": 0.1550116389989853, "learning_rate": 1.3781163964644202e-05, "loss": 0.5201, "step": 5042 }, { "epoch": 1.2763857251328776, "grad_norm": 0.15829619765281677, "learning_rate": 1.3778947291440545e-05, "loss": 0.4837, "step": 5043 }, { "epoch": 1.2766388256137686, "grad_norm": 0.15846143662929535, "learning_rate": 1.3776730401597858e-05, "loss": 0.5012, "step": 5044 }, { "epoch": 1.2768919260946596, "grad_norm": 0.15265394747257233, "learning_rate": 1.3774513295243226e-05, "loss": 0.5098, "step": 5045 }, { "epoch": 1.2771450265755506, "grad_norm": 0.14910796284675598, "learning_rate": 1.3772295972503753e-05, "loss": 0.4931, "step": 5046 }, { "epoch": 1.2773981270564414, "grad_norm": 0.15300114452838898, "learning_rate": 1.3770078433506555e-05, "loss": 0.4869, "step": 5047 }, { "epoch": 1.2776512275373324, "grad_norm": 0.1681305468082428, "learning_rate": 1.376786067837876e-05, "loss": 0.485, "step": 5048 }, { "epoch": 1.2779043280182232, "grad_norm": 0.15471267700195312, "learning_rate": 1.3765642707247502e-05, "loss": 0.4892, "step": 5049 }, { "epoch": 1.2781574284991142, "grad_norm": 0.1528155356645584, "learning_rate": 1.3763424520239938e-05, "loss": 0.4648, "step": 5050 }, { "epoch": 1.2784105289800052, "grad_norm": 0.16086360812187195, "learning_rate": 1.3761206117483225e-05, "loss": 0.5158, "step": 5051 }, { "epoch": 1.278663629460896, "grad_norm": 0.15786206722259521, "learning_rate": 1.3758987499104546e-05, "loss": 0.4873, "step": 5052 }, { "epoch": 1.278916729941787, "grad_norm": 0.15750518441200256, "learning_rate": 1.375676866523109e-05, "loss": 0.4841, "step": 5053 }, { "epoch": 1.2791698304226777, "grad_norm": 0.1578463762998581, "learning_rate": 1.3754549615990051e-05, "loss": 0.5109, "step": 5054 }, { "epoch": 1.2794229309035687, "grad_norm": 0.15508858859539032, "learning_rate": 1.3752330351508648e-05, "loss": 0.5153, "step": 5055 }, { "epoch": 1.2796760313844597, "grad_norm": 0.1580805629491806, "learning_rate": 1.3750110871914108e-05, "loss": 0.5016, "step": 5056 }, { "epoch": 1.2799291318653505, "grad_norm": 0.15405960381031036, "learning_rate": 1.3747891177333666e-05, "loss": 0.4875, "step": 5057 }, { "epoch": 1.2801822323462415, "grad_norm": 0.15447300672531128, "learning_rate": 1.3745671267894571e-05, "loss": 0.489, "step": 5058 }, { "epoch": 1.2804353328271323, "grad_norm": 0.15518219769001007, "learning_rate": 1.3743451143724089e-05, "loss": 0.5099, "step": 5059 }, { "epoch": 1.2806884333080233, "grad_norm": 0.1583220660686493, "learning_rate": 1.3741230804949494e-05, "loss": 0.4706, "step": 5060 }, { "epoch": 1.2809415337889143, "grad_norm": 0.1561814397573471, "learning_rate": 1.3739010251698072e-05, "loss": 0.4772, "step": 5061 }, { "epoch": 1.281194634269805, "grad_norm": 0.16096830368041992, "learning_rate": 1.3736789484097126e-05, "loss": 0.4783, "step": 5062 }, { "epoch": 1.281447734750696, "grad_norm": 0.15150035917758942, "learning_rate": 1.3734568502273961e-05, "loss": 0.4705, "step": 5063 }, { "epoch": 1.2817008352315868, "grad_norm": 0.15861433744430542, "learning_rate": 1.3732347306355909e-05, "loss": 0.5211, "step": 5064 }, { "epoch": 1.2819539357124778, "grad_norm": 0.15420398116111755, "learning_rate": 1.3730125896470304e-05, "loss": 0.4868, "step": 5065 }, { "epoch": 1.2822070361933688, "grad_norm": 0.1572595089673996, "learning_rate": 1.3727904272744493e-05, "loss": 0.4952, "step": 5066 }, { "epoch": 1.2824601366742596, "grad_norm": 0.16153495013713837, "learning_rate": 1.3725682435305836e-05, "loss": 0.4833, "step": 5067 }, { "epoch": 1.2827132371551506, "grad_norm": 0.1489262878894806, "learning_rate": 1.3723460384281708e-05, "loss": 0.4503, "step": 5068 }, { "epoch": 1.2829663376360414, "grad_norm": 0.16265422105789185, "learning_rate": 1.3721238119799496e-05, "loss": 0.4906, "step": 5069 }, { "epoch": 1.2832194381169324, "grad_norm": 0.16378578543663025, "learning_rate": 1.3719015641986597e-05, "loss": 0.481, "step": 5070 }, { "epoch": 1.2834725385978234, "grad_norm": 0.15731146931648254, "learning_rate": 1.3716792950970419e-05, "loss": 0.4985, "step": 5071 }, { "epoch": 1.2837256390787142, "grad_norm": 0.15722383558750153, "learning_rate": 1.3714570046878387e-05, "loss": 0.4886, "step": 5072 }, { "epoch": 1.2839787395596052, "grad_norm": 0.15871192514896393, "learning_rate": 1.371234692983793e-05, "loss": 0.4978, "step": 5073 }, { "epoch": 1.284231840040496, "grad_norm": 0.15428891777992249, "learning_rate": 1.3710123599976502e-05, "loss": 0.5281, "step": 5074 }, { "epoch": 1.284484940521387, "grad_norm": 0.16274142265319824, "learning_rate": 1.3707900057421557e-05, "loss": 0.5179, "step": 5075 }, { "epoch": 1.284738041002278, "grad_norm": 0.16279642283916473, "learning_rate": 1.3705676302300566e-05, "loss": 0.4678, "step": 5076 }, { "epoch": 1.284991141483169, "grad_norm": 0.15746288001537323, "learning_rate": 1.3703452334741013e-05, "loss": 0.5092, "step": 5077 }, { "epoch": 1.2852442419640597, "grad_norm": 0.1592693328857422, "learning_rate": 1.3701228154870396e-05, "loss": 0.4747, "step": 5078 }, { "epoch": 1.2854973424449505, "grad_norm": 0.16481532156467438, "learning_rate": 1.3699003762816216e-05, "loss": 0.4837, "step": 5079 }, { "epoch": 1.2857504429258415, "grad_norm": 0.15889596939086914, "learning_rate": 1.3696779158706e-05, "loss": 0.4686, "step": 5080 }, { "epoch": 1.2860035434067325, "grad_norm": 0.15593771636486053, "learning_rate": 1.3694554342667279e-05, "loss": 0.4781, "step": 5081 }, { "epoch": 1.2862566438876235, "grad_norm": 0.1638403981924057, "learning_rate": 1.369232931482759e-05, "loss": 0.5046, "step": 5082 }, { "epoch": 1.2865097443685143, "grad_norm": 0.1569722592830658, "learning_rate": 1.3690104075314494e-05, "loss": 0.5029, "step": 5083 }, { "epoch": 1.2867628448494053, "grad_norm": 0.15542711317539215, "learning_rate": 1.3687878624255562e-05, "loss": 0.509, "step": 5084 }, { "epoch": 1.287015945330296, "grad_norm": 0.16561990976333618, "learning_rate": 1.3685652961778368e-05, "loss": 0.4844, "step": 5085 }, { "epoch": 1.287269045811187, "grad_norm": 0.15400412678718567, "learning_rate": 1.368342708801051e-05, "loss": 0.4691, "step": 5086 }, { "epoch": 1.287522146292078, "grad_norm": 0.16069866716861725, "learning_rate": 1.3681201003079591e-05, "loss": 0.4873, "step": 5087 }, { "epoch": 1.2877752467729688, "grad_norm": 0.15552347898483276, "learning_rate": 1.3678974707113226e-05, "loss": 0.4788, "step": 5088 }, { "epoch": 1.2880283472538598, "grad_norm": 0.15616215765476227, "learning_rate": 1.3676748200239045e-05, "loss": 0.4938, "step": 5089 }, { "epoch": 1.2882814477347506, "grad_norm": 0.1586655080318451, "learning_rate": 1.3674521482584689e-05, "loss": 0.4791, "step": 5090 }, { "epoch": 1.2885345482156416, "grad_norm": 0.16597898304462433, "learning_rate": 1.367229455427781e-05, "loss": 0.5036, "step": 5091 }, { "epoch": 1.2887876486965326, "grad_norm": 0.15865552425384521, "learning_rate": 1.3670067415446075e-05, "loss": 0.5216, "step": 5092 }, { "epoch": 1.2890407491774234, "grad_norm": 0.1526980996131897, "learning_rate": 1.3667840066217162e-05, "loss": 0.4971, "step": 5093 }, { "epoch": 1.2892938496583144, "grad_norm": 0.15838992595672607, "learning_rate": 1.3665612506718754e-05, "loss": 0.5187, "step": 5094 }, { "epoch": 1.2895469501392052, "grad_norm": 0.15658220648765564, "learning_rate": 1.366338473707856e-05, "loss": 0.4909, "step": 5095 }, { "epoch": 1.2898000506200962, "grad_norm": 0.15619255602359772, "learning_rate": 1.366115675742429e-05, "loss": 0.4721, "step": 5096 }, { "epoch": 1.2900531511009872, "grad_norm": 0.15553420782089233, "learning_rate": 1.3658928567883672e-05, "loss": 0.5078, "step": 5097 }, { "epoch": 1.290306251581878, "grad_norm": 0.15194444358348846, "learning_rate": 1.3656700168584433e-05, "loss": 0.4764, "step": 5098 }, { "epoch": 1.290559352062769, "grad_norm": 0.15691937506198883, "learning_rate": 1.3654471559654336e-05, "loss": 0.5059, "step": 5099 }, { "epoch": 1.2908124525436597, "grad_norm": 0.15746569633483887, "learning_rate": 1.3652242741221135e-05, "loss": 0.4967, "step": 5100 }, { "epoch": 1.2910655530245507, "grad_norm": 0.156978577375412, "learning_rate": 1.3650013713412606e-05, "loss": 0.4981, "step": 5101 }, { "epoch": 1.2913186535054417, "grad_norm": 0.15610428154468536, "learning_rate": 1.364778447635653e-05, "loss": 0.4995, "step": 5102 }, { "epoch": 1.2915717539863325, "grad_norm": 0.15640105307102203, "learning_rate": 1.3645555030180711e-05, "loss": 0.5159, "step": 5103 }, { "epoch": 1.2918248544672235, "grad_norm": 0.1541210561990738, "learning_rate": 1.3643325375012955e-05, "loss": 0.4974, "step": 5104 }, { "epoch": 1.2920779549481143, "grad_norm": 0.15650223195552826, "learning_rate": 1.3641095510981082e-05, "loss": 0.4933, "step": 5105 }, { "epoch": 1.2923310554290053, "grad_norm": 0.16193224489688873, "learning_rate": 1.3638865438212929e-05, "loss": 0.4846, "step": 5106 }, { "epoch": 1.2925841559098963, "grad_norm": 0.15657363831996918, "learning_rate": 1.3636635156836338e-05, "loss": 0.5137, "step": 5107 }, { "epoch": 1.292837256390787, "grad_norm": 0.1572469025850296, "learning_rate": 1.3634404666979166e-05, "loss": 0.4764, "step": 5108 }, { "epoch": 1.293090356871678, "grad_norm": 0.16133861243724823, "learning_rate": 1.3632173968769287e-05, "loss": 0.4666, "step": 5109 }, { "epoch": 1.2933434573525688, "grad_norm": 0.15103675425052643, "learning_rate": 1.3629943062334578e-05, "loss": 0.4669, "step": 5110 }, { "epoch": 1.2935965578334598, "grad_norm": 0.1555064469575882, "learning_rate": 1.3627711947802932e-05, "loss": 0.479, "step": 5111 }, { "epoch": 1.2938496583143508, "grad_norm": 0.15608786046504974, "learning_rate": 1.3625480625302257e-05, "loss": 0.4896, "step": 5112 }, { "epoch": 1.2941027587952418, "grad_norm": 0.16090935468673706, "learning_rate": 1.3623249094960467e-05, "loss": 0.467, "step": 5113 }, { "epoch": 1.2943558592761326, "grad_norm": 0.16140180826187134, "learning_rate": 1.3621017356905495e-05, "loss": 0.4913, "step": 5114 }, { "epoch": 1.2946089597570236, "grad_norm": 0.1554451584815979, "learning_rate": 1.3618785411265277e-05, "loss": 0.4725, "step": 5115 }, { "epoch": 1.2948620602379144, "grad_norm": 0.16133852303028107, "learning_rate": 1.3616553258167766e-05, "loss": 0.5105, "step": 5116 }, { "epoch": 1.2951151607188054, "grad_norm": 0.15572699904441833, "learning_rate": 1.361432089774093e-05, "loss": 0.4857, "step": 5117 }, { "epoch": 1.2953682611996964, "grad_norm": 0.15809126198291779, "learning_rate": 1.3612088330112743e-05, "loss": 0.4968, "step": 5118 }, { "epoch": 1.2956213616805872, "grad_norm": 0.15514202415943146, "learning_rate": 1.3609855555411193e-05, "loss": 0.5096, "step": 5119 }, { "epoch": 1.2958744621614782, "grad_norm": 0.1579432487487793, "learning_rate": 1.3607622573764286e-05, "loss": 0.4811, "step": 5120 }, { "epoch": 1.296127562642369, "grad_norm": 0.15953850746154785, "learning_rate": 1.3605389385300026e-05, "loss": 0.4952, "step": 5121 }, { "epoch": 1.29638066312326, "grad_norm": 0.15248605608940125, "learning_rate": 1.3603155990146442e-05, "loss": 0.4902, "step": 5122 }, { "epoch": 1.296633763604151, "grad_norm": 0.16017501056194305, "learning_rate": 1.3600922388431568e-05, "loss": 0.5185, "step": 5123 }, { "epoch": 1.2968868640850417, "grad_norm": 0.1516309529542923, "learning_rate": 1.3598688580283447e-05, "loss": 0.4601, "step": 5124 }, { "epoch": 1.2971399645659327, "grad_norm": 0.15429575741291046, "learning_rate": 1.359645456583015e-05, "loss": 0.4717, "step": 5125 }, { "epoch": 1.2973930650468235, "grad_norm": 0.15802225470542908, "learning_rate": 1.359422034519974e-05, "loss": 0.4917, "step": 5126 }, { "epoch": 1.2976461655277145, "grad_norm": 0.16387757658958435, "learning_rate": 1.3591985918520299e-05, "loss": 0.4843, "step": 5127 }, { "epoch": 1.2978992660086055, "grad_norm": 0.1632823348045349, "learning_rate": 1.358975128591993e-05, "loss": 0.4995, "step": 5128 }, { "epoch": 1.2981523664894963, "grad_norm": 0.1687355488538742, "learning_rate": 1.3587516447526729e-05, "loss": 0.4852, "step": 5129 }, { "epoch": 1.2984054669703873, "grad_norm": 0.15707646310329437, "learning_rate": 1.3585281403468821e-05, "loss": 0.5154, "step": 5130 }, { "epoch": 1.298658567451278, "grad_norm": 0.1586480438709259, "learning_rate": 1.3583046153874339e-05, "loss": 0.4963, "step": 5131 }, { "epoch": 1.298911667932169, "grad_norm": 0.15734417736530304, "learning_rate": 1.358081069887142e-05, "loss": 0.4919, "step": 5132 }, { "epoch": 1.29916476841306, "grad_norm": 0.15976615250110626, "learning_rate": 1.357857503858822e-05, "loss": 0.4916, "step": 5133 }, { "epoch": 1.2994178688939508, "grad_norm": 0.15272372961044312, "learning_rate": 1.3576339173152902e-05, "loss": 0.5005, "step": 5134 }, { "epoch": 1.2996709693748418, "grad_norm": 0.15514419972896576, "learning_rate": 1.3574103102693645e-05, "loss": 0.4841, "step": 5135 }, { "epoch": 1.2999240698557326, "grad_norm": 0.15154671669006348, "learning_rate": 1.3571866827338642e-05, "loss": 0.4596, "step": 5136 }, { "epoch": 1.3001771703366236, "grad_norm": 0.15758013725280762, "learning_rate": 1.3569630347216088e-05, "loss": 0.5001, "step": 5137 }, { "epoch": 1.3004302708175146, "grad_norm": 0.15581296384334564, "learning_rate": 1.3567393662454198e-05, "loss": 0.4995, "step": 5138 }, { "epoch": 1.3006833712984054, "grad_norm": 0.16015547513961792, "learning_rate": 1.3565156773181198e-05, "loss": 0.5061, "step": 5139 }, { "epoch": 1.3009364717792964, "grad_norm": 0.1583743691444397, "learning_rate": 1.3562919679525321e-05, "loss": 0.509, "step": 5140 }, { "epoch": 1.3011895722601872, "grad_norm": 0.16186071932315826, "learning_rate": 1.3560682381614819e-05, "loss": 0.5057, "step": 5141 }, { "epoch": 1.3014426727410782, "grad_norm": 0.16262556612491608, "learning_rate": 1.3558444879577948e-05, "loss": 0.4939, "step": 5142 }, { "epoch": 1.3016957732219692, "grad_norm": 0.16120904684066772, "learning_rate": 1.3556207173542978e-05, "loss": 0.5198, "step": 5143 }, { "epoch": 1.3019488737028602, "grad_norm": 0.15991666913032532, "learning_rate": 1.3553969263638198e-05, "loss": 0.4914, "step": 5144 }, { "epoch": 1.302201974183751, "grad_norm": 0.15929840505123138, "learning_rate": 1.35517311499919e-05, "loss": 0.4923, "step": 5145 }, { "epoch": 1.302455074664642, "grad_norm": 0.1553080677986145, "learning_rate": 1.3549492832732384e-05, "loss": 0.4744, "step": 5146 }, { "epoch": 1.3027081751455327, "grad_norm": 0.1583869904279709, "learning_rate": 1.3547254311987978e-05, "loss": 0.4967, "step": 5147 }, { "epoch": 1.3029612756264237, "grad_norm": 0.16517053544521332, "learning_rate": 1.3545015587887004e-05, "loss": 0.508, "step": 5148 }, { "epoch": 1.3032143761073147, "grad_norm": 0.16748790442943573, "learning_rate": 1.3542776660557807e-05, "loss": 0.4903, "step": 5149 }, { "epoch": 1.3034674765882055, "grad_norm": 0.1537647694349289, "learning_rate": 1.3540537530128741e-05, "loss": 0.4657, "step": 5150 }, { "epoch": 1.3037205770690965, "grad_norm": 0.15391764044761658, "learning_rate": 1.3538298196728166e-05, "loss": 0.4798, "step": 5151 }, { "epoch": 1.3039736775499873, "grad_norm": 0.16029280424118042, "learning_rate": 1.3536058660484462e-05, "loss": 0.5002, "step": 5152 }, { "epoch": 1.3042267780308783, "grad_norm": 0.15472877025604248, "learning_rate": 1.3533818921526017e-05, "loss": 0.5009, "step": 5153 }, { "epoch": 1.3044798785117693, "grad_norm": 0.15711376070976257, "learning_rate": 1.3531578979981229e-05, "loss": 0.4836, "step": 5154 }, { "epoch": 1.30473297899266, "grad_norm": 0.15769673883914948, "learning_rate": 1.3529338835978507e-05, "loss": 0.5214, "step": 5155 }, { "epoch": 1.304986079473551, "grad_norm": 0.17491064965724945, "learning_rate": 1.3527098489646278e-05, "loss": 0.5148, "step": 5156 }, { "epoch": 1.3052391799544418, "grad_norm": 0.15654605627059937, "learning_rate": 1.3524857941112974e-05, "loss": 0.4851, "step": 5157 }, { "epoch": 1.3054922804353328, "grad_norm": 0.16803689301013947, "learning_rate": 1.3522617190507042e-05, "loss": 0.488, "step": 5158 }, { "epoch": 1.3057453809162238, "grad_norm": 0.1541115939617157, "learning_rate": 1.3520376237956938e-05, "loss": 0.4924, "step": 5159 }, { "epoch": 1.3059984813971146, "grad_norm": 0.1570245325565338, "learning_rate": 1.3518135083591131e-05, "loss": 0.5013, "step": 5160 }, { "epoch": 1.3062515818780056, "grad_norm": 0.15454532206058502, "learning_rate": 1.3515893727538103e-05, "loss": 0.4914, "step": 5161 }, { "epoch": 1.3065046823588964, "grad_norm": 0.16053378582000732, "learning_rate": 1.3513652169926349e-05, "loss": 0.4826, "step": 5162 }, { "epoch": 1.3067577828397874, "grad_norm": 0.15660127997398376, "learning_rate": 1.3511410410884364e-05, "loss": 0.4847, "step": 5163 }, { "epoch": 1.3070108833206784, "grad_norm": 0.15697096288204193, "learning_rate": 1.3509168450540672e-05, "loss": 0.487, "step": 5164 }, { "epoch": 1.3072639838015692, "grad_norm": 0.15342137217521667, "learning_rate": 1.3506926289023797e-05, "loss": 0.4706, "step": 5165 }, { "epoch": 1.3075170842824602, "grad_norm": 0.16706112027168274, "learning_rate": 1.3504683926462274e-05, "loss": 0.4892, "step": 5166 }, { "epoch": 1.307770184763351, "grad_norm": 0.15388056635856628, "learning_rate": 1.350244136298466e-05, "loss": 0.4897, "step": 5167 }, { "epoch": 1.308023285244242, "grad_norm": 0.15066739916801453, "learning_rate": 1.3500198598719507e-05, "loss": 0.452, "step": 5168 }, { "epoch": 1.308276385725133, "grad_norm": 0.15664394199848175, "learning_rate": 1.3497955633795398e-05, "loss": 0.4917, "step": 5169 }, { "epoch": 1.3085294862060237, "grad_norm": 0.16529138386249542, "learning_rate": 1.3495712468340911e-05, "loss": 0.4876, "step": 5170 }, { "epoch": 1.3087825866869147, "grad_norm": 0.1654326468706131, "learning_rate": 1.349346910248464e-05, "loss": 0.4862, "step": 5171 }, { "epoch": 1.3090356871678055, "grad_norm": 0.1610490381717682, "learning_rate": 1.3491225536355202e-05, "loss": 0.4942, "step": 5172 }, { "epoch": 1.3092887876486965, "grad_norm": 0.1537841409444809, "learning_rate": 1.3488981770081207e-05, "loss": 0.4825, "step": 5173 }, { "epoch": 1.3095418881295875, "grad_norm": 0.15767480432987213, "learning_rate": 1.3486737803791284e-05, "loss": 0.4768, "step": 5174 }, { "epoch": 1.3097949886104785, "grad_norm": 0.15579943358898163, "learning_rate": 1.3484493637614085e-05, "loss": 0.5, "step": 5175 }, { "epoch": 1.3100480890913693, "grad_norm": 0.1566162109375, "learning_rate": 1.3482249271678254e-05, "loss": 0.5028, "step": 5176 }, { "epoch": 1.31030118957226, "grad_norm": 0.15727785229682922, "learning_rate": 1.348000470611246e-05, "loss": 0.5222, "step": 5177 }, { "epoch": 1.310554290053151, "grad_norm": 0.15919847786426544, "learning_rate": 1.347775994104538e-05, "loss": 0.5192, "step": 5178 }, { "epoch": 1.310807390534042, "grad_norm": 0.1647881120443344, "learning_rate": 1.3475514976605695e-05, "loss": 0.5245, "step": 5179 }, { "epoch": 1.311060491014933, "grad_norm": 0.15662293136119843, "learning_rate": 1.3473269812922111e-05, "loss": 0.4808, "step": 5180 }, { "epoch": 1.3113135914958238, "grad_norm": 0.1578540951013565, "learning_rate": 1.3471024450123338e-05, "loss": 0.4915, "step": 5181 }, { "epoch": 1.3115666919767148, "grad_norm": 0.15600253641605377, "learning_rate": 1.3468778888338094e-05, "loss": 0.5116, "step": 5182 }, { "epoch": 1.3118197924576056, "grad_norm": 0.15762686729431152, "learning_rate": 1.3466533127695114e-05, "loss": 0.4792, "step": 5183 }, { "epoch": 1.3120728929384966, "grad_norm": 0.15500959753990173, "learning_rate": 1.3464287168323145e-05, "loss": 0.5029, "step": 5184 }, { "epoch": 1.3123259934193876, "grad_norm": 0.15734221041202545, "learning_rate": 1.3462041010350939e-05, "loss": 0.4932, "step": 5185 }, { "epoch": 1.3125790939002784, "grad_norm": 0.1531873196363449, "learning_rate": 1.345979465390727e-05, "loss": 0.4898, "step": 5186 }, { "epoch": 1.3128321943811694, "grad_norm": 0.15776443481445312, "learning_rate": 1.3457548099120908e-05, "loss": 0.5162, "step": 5187 }, { "epoch": 1.3130852948620602, "grad_norm": 0.15730629861354828, "learning_rate": 1.345530134612065e-05, "loss": 0.5073, "step": 5188 }, { "epoch": 1.3133383953429512, "grad_norm": 0.15981771051883698, "learning_rate": 1.3453054395035297e-05, "loss": 0.4862, "step": 5189 }, { "epoch": 1.3135914958238422, "grad_norm": 0.15618671476840973, "learning_rate": 1.3450807245993656e-05, "loss": 0.4953, "step": 5190 }, { "epoch": 1.313844596304733, "grad_norm": 0.1637888103723526, "learning_rate": 1.3448559899124559e-05, "loss": 0.4559, "step": 5191 }, { "epoch": 1.314097696785624, "grad_norm": 0.15773461759090424, "learning_rate": 1.3446312354556838e-05, "loss": 0.4788, "step": 5192 }, { "epoch": 1.3143507972665147, "grad_norm": 0.16102901101112366, "learning_rate": 1.3444064612419338e-05, "loss": 0.4871, "step": 5193 }, { "epoch": 1.3146038977474057, "grad_norm": 0.21338343620300293, "learning_rate": 1.3441816672840921e-05, "loss": 0.4924, "step": 5194 }, { "epoch": 1.3148569982282967, "grad_norm": 0.1593499630689621, "learning_rate": 1.3439568535950456e-05, "loss": 0.4783, "step": 5195 }, { "epoch": 1.3151100987091875, "grad_norm": 0.15978440642356873, "learning_rate": 1.343732020187682e-05, "loss": 0.5241, "step": 5196 }, { "epoch": 1.3153631991900785, "grad_norm": 0.1582406759262085, "learning_rate": 1.3435071670748911e-05, "loss": 0.4904, "step": 5197 }, { "epoch": 1.3156162996709693, "grad_norm": 0.16386595368385315, "learning_rate": 1.3432822942695628e-05, "loss": 0.5079, "step": 5198 }, { "epoch": 1.3158694001518603, "grad_norm": 0.15726791322231293, "learning_rate": 1.343057401784589e-05, "loss": 0.4708, "step": 5199 }, { "epoch": 1.3161225006327513, "grad_norm": 0.1593991369009018, "learning_rate": 1.3428324896328619e-05, "loss": 0.4914, "step": 5200 }, { "epoch": 1.316375601113642, "grad_norm": 0.15335802733898163, "learning_rate": 1.3426075578272752e-05, "loss": 0.4668, "step": 5201 }, { "epoch": 1.316628701594533, "grad_norm": 0.15460149943828583, "learning_rate": 1.3423826063807242e-05, "loss": 0.5099, "step": 5202 }, { "epoch": 1.3168818020754238, "grad_norm": 0.15627674758434296, "learning_rate": 1.3421576353061045e-05, "loss": 0.4964, "step": 5203 }, { "epoch": 1.3171349025563148, "grad_norm": 0.16076211631298065, "learning_rate": 1.3419326446163136e-05, "loss": 0.4946, "step": 5204 }, { "epoch": 1.3173880030372058, "grad_norm": 0.1627110093832016, "learning_rate": 1.3417076343242494e-05, "loss": 0.5126, "step": 5205 }, { "epoch": 1.3176411035180966, "grad_norm": 0.16545632481575012, "learning_rate": 1.3414826044428113e-05, "loss": 0.4985, "step": 5206 }, { "epoch": 1.3178942039989876, "grad_norm": 0.1503448337316513, "learning_rate": 1.3412575549849001e-05, "loss": 0.4597, "step": 5207 }, { "epoch": 1.3181473044798784, "grad_norm": 0.16493690013885498, "learning_rate": 1.341032485963417e-05, "loss": 0.5026, "step": 5208 }, { "epoch": 1.3184004049607694, "grad_norm": 0.15588316321372986, "learning_rate": 1.3408073973912647e-05, "loss": 0.4708, "step": 5209 }, { "epoch": 1.3186535054416604, "grad_norm": 0.1588958352804184, "learning_rate": 1.3405822892813474e-05, "loss": 0.465, "step": 5210 }, { "epoch": 1.3189066059225514, "grad_norm": 0.16047261655330658, "learning_rate": 1.34035716164657e-05, "loss": 0.5023, "step": 5211 }, { "epoch": 1.3191597064034422, "grad_norm": 0.15204760432243347, "learning_rate": 1.3401320144998384e-05, "loss": 0.4465, "step": 5212 }, { "epoch": 1.3194128068843332, "grad_norm": 0.15873755514621735, "learning_rate": 1.33990684785406e-05, "loss": 0.4823, "step": 5213 }, { "epoch": 1.319665907365224, "grad_norm": 0.1593557745218277, "learning_rate": 1.339681661722143e-05, "loss": 0.4771, "step": 5214 }, { "epoch": 1.319919007846115, "grad_norm": 0.1529778093099594, "learning_rate": 1.3394564561169967e-05, "loss": 0.471, "step": 5215 }, { "epoch": 1.320172108327006, "grad_norm": 0.15859103202819824, "learning_rate": 1.3392312310515321e-05, "loss": 0.5044, "step": 5216 }, { "epoch": 1.3204252088078967, "grad_norm": 0.1542326956987381, "learning_rate": 1.3390059865386607e-05, "loss": 0.4905, "step": 5217 }, { "epoch": 1.3206783092887877, "grad_norm": 0.16020993888378143, "learning_rate": 1.3387807225912948e-05, "loss": 0.4969, "step": 5218 }, { "epoch": 1.3209314097696785, "grad_norm": 0.15540850162506104, "learning_rate": 1.338555439222349e-05, "loss": 0.4812, "step": 5219 }, { "epoch": 1.3211845102505695, "grad_norm": 0.15907101333141327, "learning_rate": 1.3383301364447381e-05, "loss": 0.4904, "step": 5220 }, { "epoch": 1.3214376107314605, "grad_norm": 0.1611395627260208, "learning_rate": 1.338104814271378e-05, "loss": 0.4692, "step": 5221 }, { "epoch": 1.3216907112123513, "grad_norm": 0.15271660685539246, "learning_rate": 1.3378794727151861e-05, "loss": 0.4911, "step": 5222 }, { "epoch": 1.3219438116932423, "grad_norm": 0.16109374165534973, "learning_rate": 1.3376541117890806e-05, "loss": 0.4908, "step": 5223 }, { "epoch": 1.322196912174133, "grad_norm": 0.15832190215587616, "learning_rate": 1.3374287315059813e-05, "loss": 0.496, "step": 5224 }, { "epoch": 1.322450012655024, "grad_norm": 0.1574300080537796, "learning_rate": 1.3372033318788087e-05, "loss": 0.5096, "step": 5225 }, { "epoch": 1.322703113135915, "grad_norm": 0.15230083465576172, "learning_rate": 1.3369779129204843e-05, "loss": 0.4757, "step": 5226 }, { "epoch": 1.3229562136168058, "grad_norm": 0.1605527698993683, "learning_rate": 1.3367524746439307e-05, "loss": 0.4794, "step": 5227 }, { "epoch": 1.3232093140976968, "grad_norm": 0.15768930315971375, "learning_rate": 1.3365270170620723e-05, "loss": 0.4587, "step": 5228 }, { "epoch": 1.3234624145785876, "grad_norm": 0.16419273614883423, "learning_rate": 1.336301540187834e-05, "loss": 0.4696, "step": 5229 }, { "epoch": 1.3237155150594786, "grad_norm": 0.1630331426858902, "learning_rate": 1.3360760440341416e-05, "loss": 0.4714, "step": 5230 }, { "epoch": 1.3239686155403696, "grad_norm": 0.16223140060901642, "learning_rate": 1.3358505286139225e-05, "loss": 0.5054, "step": 5231 }, { "epoch": 1.3242217160212604, "grad_norm": 0.1545623391866684, "learning_rate": 1.335624993940105e-05, "loss": 0.4824, "step": 5232 }, { "epoch": 1.3244748165021514, "grad_norm": 0.15743176639080048, "learning_rate": 1.3353994400256186e-05, "loss": 0.4854, "step": 5233 }, { "epoch": 1.3247279169830422, "grad_norm": 0.155655175447464, "learning_rate": 1.3351738668833937e-05, "loss": 0.4821, "step": 5234 }, { "epoch": 1.3249810174639332, "grad_norm": 0.1678469181060791, "learning_rate": 1.334948274526362e-05, "loss": 0.4868, "step": 5235 }, { "epoch": 1.3252341179448242, "grad_norm": 0.1536780744791031, "learning_rate": 1.3347226629674563e-05, "loss": 0.4831, "step": 5236 }, { "epoch": 1.325487218425715, "grad_norm": 0.1610526740550995, "learning_rate": 1.3344970322196103e-05, "loss": 0.4993, "step": 5237 }, { "epoch": 1.325740318906606, "grad_norm": 0.1580774486064911, "learning_rate": 1.334271382295759e-05, "loss": 0.5044, "step": 5238 }, { "epoch": 1.3259934193874967, "grad_norm": 0.1497715562582016, "learning_rate": 1.3340457132088387e-05, "loss": 0.4721, "step": 5239 }, { "epoch": 1.3262465198683877, "grad_norm": 0.15940026938915253, "learning_rate": 1.3338200249717856e-05, "loss": 0.5016, "step": 5240 }, { "epoch": 1.3264996203492787, "grad_norm": 0.1635264754295349, "learning_rate": 1.333594317597539e-05, "loss": 0.4924, "step": 5241 }, { "epoch": 1.3267527208301697, "grad_norm": 0.17990398406982422, "learning_rate": 1.3333685910990379e-05, "loss": 0.5016, "step": 5242 }, { "epoch": 1.3270058213110605, "grad_norm": 0.16434642672538757, "learning_rate": 1.3331428454892228e-05, "loss": 0.5024, "step": 5243 }, { "epoch": 1.3272589217919515, "grad_norm": 0.16191455721855164, "learning_rate": 1.3329170807810345e-05, "loss": 0.4733, "step": 5244 }, { "epoch": 1.3275120222728423, "grad_norm": 0.16072039306163788, "learning_rate": 1.3326912969874167e-05, "loss": 0.4878, "step": 5245 }, { "epoch": 1.3277651227537333, "grad_norm": 0.17604763805866241, "learning_rate": 1.332465494121312e-05, "loss": 0.4753, "step": 5246 }, { "epoch": 1.3280182232346243, "grad_norm": 0.1614055186510086, "learning_rate": 1.3322396721956661e-05, "loss": 0.4858, "step": 5247 }, { "epoch": 1.328271323715515, "grad_norm": 0.1671738624572754, "learning_rate": 1.3320138312234246e-05, "loss": 0.4938, "step": 5248 }, { "epoch": 1.328524424196406, "grad_norm": 0.15975067019462585, "learning_rate": 1.3317879712175344e-05, "loss": 0.4917, "step": 5249 }, { "epoch": 1.3287775246772968, "grad_norm": 0.17257250845432281, "learning_rate": 1.3315620921909433e-05, "loss": 0.5047, "step": 5250 }, { "epoch": 1.3290306251581878, "grad_norm": 0.15491220355033875, "learning_rate": 1.331336194156601e-05, "loss": 0.4997, "step": 5251 }, { "epoch": 1.3292837256390788, "grad_norm": 0.17149552702903748, "learning_rate": 1.3311102771274577e-05, "loss": 0.4875, "step": 5252 }, { "epoch": 1.3295368261199696, "grad_norm": 0.16132371127605438, "learning_rate": 1.3308843411164642e-05, "loss": 0.4935, "step": 5253 }, { "epoch": 1.3297899266008606, "grad_norm": 0.15514063835144043, "learning_rate": 1.3306583861365737e-05, "loss": 0.4897, "step": 5254 }, { "epoch": 1.3300430270817514, "grad_norm": 0.16008497774600983, "learning_rate": 1.3304324122007394e-05, "loss": 0.4934, "step": 5255 }, { "epoch": 1.3302961275626424, "grad_norm": 0.1559130698442459, "learning_rate": 1.3302064193219155e-05, "loss": 0.4902, "step": 5256 }, { "epoch": 1.3305492280435334, "grad_norm": 0.15771523118019104, "learning_rate": 1.3299804075130581e-05, "loss": 0.4915, "step": 5257 }, { "epoch": 1.3308023285244241, "grad_norm": 0.16215983033180237, "learning_rate": 1.3297543767871242e-05, "loss": 0.515, "step": 5258 }, { "epoch": 1.3310554290053151, "grad_norm": 0.2580120861530304, "learning_rate": 1.3295283271570707e-05, "loss": 0.5045, "step": 5259 }, { "epoch": 1.331308529486206, "grad_norm": 0.1653471142053604, "learning_rate": 1.3293022586358578e-05, "loss": 0.4881, "step": 5260 }, { "epoch": 1.331561629967097, "grad_norm": 0.15939636528491974, "learning_rate": 1.3290761712364448e-05, "loss": 0.4833, "step": 5261 }, { "epoch": 1.331814730447988, "grad_norm": 0.1600833535194397, "learning_rate": 1.328850064971793e-05, "loss": 0.4896, "step": 5262 }, { "epoch": 1.3320678309288787, "grad_norm": 0.15766093134880066, "learning_rate": 1.3286239398548641e-05, "loss": 0.4849, "step": 5263 }, { "epoch": 1.3323209314097697, "grad_norm": 0.1749449074268341, "learning_rate": 1.3283977958986224e-05, "loss": 0.4995, "step": 5264 }, { "epoch": 1.3325740318906605, "grad_norm": 0.1618470400571823, "learning_rate": 1.3281716331160315e-05, "loss": 0.5001, "step": 5265 }, { "epoch": 1.3328271323715515, "grad_norm": 0.15434621274471283, "learning_rate": 1.3279454515200571e-05, "loss": 0.477, "step": 5266 }, { "epoch": 1.3330802328524425, "grad_norm": 0.1593439280986786, "learning_rate": 1.3277192511236654e-05, "loss": 0.5009, "step": 5267 }, { "epoch": 1.3333333333333333, "grad_norm": 0.15460529923439026, "learning_rate": 1.3274930319398243e-05, "loss": 0.4906, "step": 5268 }, { "epoch": 1.3335864338142243, "grad_norm": 0.1737535297870636, "learning_rate": 1.3272667939815023e-05, "loss": 0.4886, "step": 5269 }, { "epoch": 1.333839534295115, "grad_norm": 0.16623926162719727, "learning_rate": 1.3270405372616693e-05, "loss": 0.4802, "step": 5270 }, { "epoch": 1.334092634776006, "grad_norm": 0.15978845953941345, "learning_rate": 1.326814261793296e-05, "loss": 0.4998, "step": 5271 }, { "epoch": 1.334345735256897, "grad_norm": 0.15721234679222107, "learning_rate": 1.3265879675893542e-05, "loss": 0.4987, "step": 5272 }, { "epoch": 1.334598835737788, "grad_norm": 0.1532667726278305, "learning_rate": 1.3263616546628172e-05, "loss": 0.4613, "step": 5273 }, { "epoch": 1.3348519362186788, "grad_norm": 0.15598708391189575, "learning_rate": 1.3261353230266587e-05, "loss": 0.4946, "step": 5274 }, { "epoch": 1.3351050366995696, "grad_norm": 0.15913717448711395, "learning_rate": 1.325908972693854e-05, "loss": 0.4818, "step": 5275 }, { "epoch": 1.3353581371804606, "grad_norm": 0.15758365392684937, "learning_rate": 1.3256826036773794e-05, "loss": 0.4756, "step": 5276 }, { "epoch": 1.3356112376613516, "grad_norm": 0.15642158687114716, "learning_rate": 1.3254562159902115e-05, "loss": 0.4862, "step": 5277 }, { "epoch": 1.3358643381422426, "grad_norm": 0.1596028059720993, "learning_rate": 1.3252298096453295e-05, "loss": 0.4829, "step": 5278 }, { "epoch": 1.3361174386231334, "grad_norm": 0.15876437723636627, "learning_rate": 1.3250033846557124e-05, "loss": 0.4806, "step": 5279 }, { "epoch": 1.3363705391040244, "grad_norm": 0.19206549227237701, "learning_rate": 1.3247769410343408e-05, "loss": 0.4738, "step": 5280 }, { "epoch": 1.3366236395849151, "grad_norm": 0.15558959543704987, "learning_rate": 1.3245504787941957e-05, "loss": 0.485, "step": 5281 }, { "epoch": 1.3368767400658061, "grad_norm": 0.17636127769947052, "learning_rate": 1.3243239979482602e-05, "loss": 0.477, "step": 5282 }, { "epoch": 1.3371298405466971, "grad_norm": 0.15808559954166412, "learning_rate": 1.3240974985095184e-05, "loss": 0.5005, "step": 5283 }, { "epoch": 1.337382941027588, "grad_norm": 0.15720990300178528, "learning_rate": 1.3238709804909537e-05, "loss": 0.4962, "step": 5284 }, { "epoch": 1.337636041508479, "grad_norm": 0.1609892100095749, "learning_rate": 1.323644443905553e-05, "loss": 0.5047, "step": 5285 }, { "epoch": 1.3378891419893697, "grad_norm": 0.15565304458141327, "learning_rate": 1.3234178887663032e-05, "loss": 0.5027, "step": 5286 }, { "epoch": 1.3381422424702607, "grad_norm": 0.16565965116024017, "learning_rate": 1.3231913150861914e-05, "loss": 0.4865, "step": 5287 }, { "epoch": 1.3383953429511517, "grad_norm": 0.15391512215137482, "learning_rate": 1.3229647228782072e-05, "loss": 0.4778, "step": 5288 }, { "epoch": 1.3386484434320425, "grad_norm": 0.15450388193130493, "learning_rate": 1.3227381121553408e-05, "loss": 0.4599, "step": 5289 }, { "epoch": 1.3389015439129335, "grad_norm": 0.16720673441886902, "learning_rate": 1.3225114829305828e-05, "loss": 0.5244, "step": 5290 }, { "epoch": 1.3391546443938243, "grad_norm": 0.15676811337471008, "learning_rate": 1.3222848352169258e-05, "loss": 0.4789, "step": 5291 }, { "epoch": 1.3394077448747153, "grad_norm": 0.15984578430652618, "learning_rate": 1.3220581690273627e-05, "loss": 0.4952, "step": 5292 }, { "epoch": 1.3396608453556063, "grad_norm": 0.1601322442293167, "learning_rate": 1.3218314843748879e-05, "loss": 0.5137, "step": 5293 }, { "epoch": 1.339913945836497, "grad_norm": 0.15700240433216095, "learning_rate": 1.3216047812724966e-05, "loss": 0.491, "step": 5294 }, { "epoch": 1.340167046317388, "grad_norm": 0.15925902128219604, "learning_rate": 1.3213780597331855e-05, "loss": 0.4827, "step": 5295 }, { "epoch": 1.3404201467982788, "grad_norm": 0.15449264645576477, "learning_rate": 1.3211513197699522e-05, "loss": 0.4991, "step": 5296 }, { "epoch": 1.3406732472791698, "grad_norm": 0.16194504499435425, "learning_rate": 1.3209245613957946e-05, "loss": 0.4982, "step": 5297 }, { "epoch": 1.3409263477600608, "grad_norm": 0.16223284602165222, "learning_rate": 1.320697784623713e-05, "loss": 0.4872, "step": 5298 }, { "epoch": 1.3411794482409516, "grad_norm": 0.1532108038663864, "learning_rate": 1.3204709894667075e-05, "loss": 0.4749, "step": 5299 }, { "epoch": 1.3414325487218426, "grad_norm": 0.16145549714565277, "learning_rate": 1.3202441759377802e-05, "loss": 0.5096, "step": 5300 }, { "epoch": 1.3416856492027334, "grad_norm": 0.15361811220645905, "learning_rate": 1.3200173440499332e-05, "loss": 0.4816, "step": 5301 }, { "epoch": 1.3419387496836244, "grad_norm": 0.15616662800312042, "learning_rate": 1.3197904938161709e-05, "loss": 0.5156, "step": 5302 }, { "epoch": 1.3421918501645154, "grad_norm": 0.1596074402332306, "learning_rate": 1.3195636252494982e-05, "loss": 0.4973, "step": 5303 }, { "epoch": 1.3424449506454061, "grad_norm": 0.15977098047733307, "learning_rate": 1.3193367383629205e-05, "loss": 0.4846, "step": 5304 }, { "epoch": 1.3426980511262971, "grad_norm": 0.16424553096294403, "learning_rate": 1.3191098331694452e-05, "loss": 0.4855, "step": 5305 }, { "epoch": 1.342951151607188, "grad_norm": 0.1574251651763916, "learning_rate": 1.3188829096820798e-05, "loss": 0.5125, "step": 5306 }, { "epoch": 1.343204252088079, "grad_norm": 0.15991215407848358, "learning_rate": 1.3186559679138335e-05, "loss": 0.5219, "step": 5307 }, { "epoch": 1.34345735256897, "grad_norm": 0.1604376584291458, "learning_rate": 1.3184290078777169e-05, "loss": 0.4999, "step": 5308 }, { "epoch": 1.343710453049861, "grad_norm": 0.15986281633377075, "learning_rate": 1.3182020295867406e-05, "loss": 0.4864, "step": 5309 }, { "epoch": 1.3439635535307517, "grad_norm": 0.1614353358745575, "learning_rate": 1.3179750330539167e-05, "loss": 0.4838, "step": 5310 }, { "epoch": 1.3442166540116427, "grad_norm": 0.15671853721141815, "learning_rate": 1.3177480182922588e-05, "loss": 0.4802, "step": 5311 }, { "epoch": 1.3444697544925335, "grad_norm": 0.16507773101329803, "learning_rate": 1.3175209853147812e-05, "loss": 0.4964, "step": 5312 }, { "epoch": 1.3447228549734245, "grad_norm": 0.15250109136104584, "learning_rate": 1.3172939341344988e-05, "loss": 0.5185, "step": 5313 }, { "epoch": 1.3449759554543155, "grad_norm": 0.15548306703567505, "learning_rate": 1.3170668647644285e-05, "loss": 0.4804, "step": 5314 }, { "epoch": 1.3452290559352063, "grad_norm": 0.15957234799861908, "learning_rate": 1.3168397772175871e-05, "loss": 0.4792, "step": 5315 }, { "epoch": 1.3454821564160973, "grad_norm": 0.16042903065681458, "learning_rate": 1.3166126715069936e-05, "loss": 0.4943, "step": 5316 }, { "epoch": 1.345735256896988, "grad_norm": 0.16523852944374084, "learning_rate": 1.3163855476456671e-05, "loss": 0.4785, "step": 5317 }, { "epoch": 1.345988357377879, "grad_norm": 0.15608251094818115, "learning_rate": 1.3161584056466285e-05, "loss": 0.469, "step": 5318 }, { "epoch": 1.34624145785877, "grad_norm": 0.16805891692638397, "learning_rate": 1.315931245522899e-05, "loss": 0.4719, "step": 5319 }, { "epoch": 1.3464945583396608, "grad_norm": 0.1626690775156021, "learning_rate": 1.3157040672875012e-05, "loss": 0.5065, "step": 5320 }, { "epoch": 1.3467476588205518, "grad_norm": 0.17196938395500183, "learning_rate": 1.3154768709534591e-05, "loss": 0.4825, "step": 5321 }, { "epoch": 1.3470007593014426, "grad_norm": 0.16006532311439514, "learning_rate": 1.3152496565337974e-05, "loss": 0.4826, "step": 5322 }, { "epoch": 1.3472538597823336, "grad_norm": 0.15331290662288666, "learning_rate": 1.3150224240415412e-05, "loss": 0.4604, "step": 5323 }, { "epoch": 1.3475069602632246, "grad_norm": 0.15646253526210785, "learning_rate": 1.3147951734897177e-05, "loss": 0.4788, "step": 5324 }, { "epoch": 1.3477600607441154, "grad_norm": 0.16327303647994995, "learning_rate": 1.3145679048913548e-05, "loss": 0.5011, "step": 5325 }, { "epoch": 1.3480131612250064, "grad_norm": 0.15595552325248718, "learning_rate": 1.3143406182594813e-05, "loss": 0.509, "step": 5326 }, { "epoch": 1.3482662617058971, "grad_norm": 0.15660496056079865, "learning_rate": 1.3141133136071271e-05, "loss": 0.4934, "step": 5327 }, { "epoch": 1.3485193621867881, "grad_norm": 0.16031648218631744, "learning_rate": 1.3138859909473224e-05, "loss": 0.5098, "step": 5328 }, { "epoch": 1.3487724626676791, "grad_norm": 0.16398358345031738, "learning_rate": 1.3136586502930999e-05, "loss": 0.4993, "step": 5329 }, { "epoch": 1.34902556314857, "grad_norm": 0.15863247215747833, "learning_rate": 1.3134312916574927e-05, "loss": 0.4806, "step": 5330 }, { "epoch": 1.349278663629461, "grad_norm": 0.16088934242725372, "learning_rate": 1.313203915053534e-05, "loss": 0.5008, "step": 5331 }, { "epoch": 1.3495317641103517, "grad_norm": 0.15728208422660828, "learning_rate": 1.3129765204942593e-05, "loss": 0.4785, "step": 5332 }, { "epoch": 1.3497848645912427, "grad_norm": 0.15445680916309357, "learning_rate": 1.3127491079927047e-05, "loss": 0.4839, "step": 5333 }, { "epoch": 1.3500379650721337, "grad_norm": 0.1632937490940094, "learning_rate": 1.3125216775619069e-05, "loss": 0.5194, "step": 5334 }, { "epoch": 1.3502910655530245, "grad_norm": 0.1640128791332245, "learning_rate": 1.3122942292149044e-05, "loss": 0.5064, "step": 5335 }, { "epoch": 1.3505441660339155, "grad_norm": 0.15445418655872345, "learning_rate": 1.3120667629647363e-05, "loss": 0.4888, "step": 5336 }, { "epoch": 1.3507972665148062, "grad_norm": 0.6754087209701538, "learning_rate": 1.3118392788244425e-05, "loss": 0.4927, "step": 5337 }, { "epoch": 1.3510503669956972, "grad_norm": 0.1615602672100067, "learning_rate": 1.3116117768070645e-05, "loss": 0.5068, "step": 5338 }, { "epoch": 1.3513034674765882, "grad_norm": 0.766118049621582, "learning_rate": 1.3113842569256444e-05, "loss": 0.4759, "step": 5339 }, { "epoch": 1.3515565679574792, "grad_norm": 0.1617424339056015, "learning_rate": 1.3111567191932251e-05, "loss": 0.4944, "step": 5340 }, { "epoch": 1.35180966843837, "grad_norm": 0.15158414840698242, "learning_rate": 1.3109291636228512e-05, "loss": 0.464, "step": 5341 }, { "epoch": 1.352062768919261, "grad_norm": 0.15984636545181274, "learning_rate": 1.3107015902275677e-05, "loss": 0.4802, "step": 5342 }, { "epoch": 1.3523158694001518, "grad_norm": 0.16165472567081451, "learning_rate": 1.3104739990204214e-05, "loss": 0.4962, "step": 5343 }, { "epoch": 1.3525689698810428, "grad_norm": 0.1670755296945572, "learning_rate": 1.3102463900144594e-05, "loss": 0.4951, "step": 5344 }, { "epoch": 1.3528220703619338, "grad_norm": 0.1600918173789978, "learning_rate": 1.3100187632227297e-05, "loss": 0.4983, "step": 5345 }, { "epoch": 1.3530751708428246, "grad_norm": 0.15849240124225616, "learning_rate": 1.309791118658282e-05, "loss": 0.4914, "step": 5346 }, { "epoch": 1.3533282713237156, "grad_norm": 0.16150914132595062, "learning_rate": 1.309563456334167e-05, "loss": 0.4693, "step": 5347 }, { "epoch": 1.3535813718046064, "grad_norm": 0.16467350721359253, "learning_rate": 1.309335776263435e-05, "loss": 0.4794, "step": 5348 }, { "epoch": 1.3538344722854974, "grad_norm": 0.1555204838514328, "learning_rate": 1.3091080784591397e-05, "loss": 0.4793, "step": 5349 }, { "epoch": 1.3540875727663884, "grad_norm": 0.15508686006069183, "learning_rate": 1.308880362934334e-05, "loss": 0.4999, "step": 5350 }, { "epoch": 1.3543406732472791, "grad_norm": 0.1509263515472412, "learning_rate": 1.308652629702072e-05, "loss": 0.4745, "step": 5351 }, { "epoch": 1.3545937737281701, "grad_norm": 0.15466368198394775, "learning_rate": 1.3084248787754094e-05, "loss": 0.4964, "step": 5352 }, { "epoch": 1.354846874209061, "grad_norm": 0.15707796812057495, "learning_rate": 1.3081971101674029e-05, "loss": 0.4783, "step": 5353 }, { "epoch": 1.355099974689952, "grad_norm": 0.2155030220746994, "learning_rate": 1.3079693238911095e-05, "loss": 0.5008, "step": 5354 }, { "epoch": 1.355353075170843, "grad_norm": 0.16332440078258514, "learning_rate": 1.3077415199595886e-05, "loss": 0.4901, "step": 5355 }, { "epoch": 1.3556061756517337, "grad_norm": 0.1557459682226181, "learning_rate": 1.3075136983858992e-05, "loss": 0.4894, "step": 5356 }, { "epoch": 1.3558592761326247, "grad_norm": 0.16095666587352753, "learning_rate": 1.307285859183101e-05, "loss": 0.5065, "step": 5357 }, { "epoch": 1.3561123766135155, "grad_norm": 0.15674464404582977, "learning_rate": 1.3070580023642572e-05, "loss": 0.4927, "step": 5358 }, { "epoch": 1.3563654770944065, "grad_norm": 0.16108457744121552, "learning_rate": 1.306830127942429e-05, "loss": 0.5131, "step": 5359 }, { "epoch": 1.3566185775752975, "grad_norm": 0.17315536737442017, "learning_rate": 1.3066022359306804e-05, "loss": 0.481, "step": 5360 }, { "epoch": 1.3568716780561882, "grad_norm": 0.15744417905807495, "learning_rate": 1.3063743263420761e-05, "loss": 0.483, "step": 5361 }, { "epoch": 1.3571247785370792, "grad_norm": 0.15821465849876404, "learning_rate": 1.3061463991896817e-05, "loss": 0.4692, "step": 5362 }, { "epoch": 1.35737787901797, "grad_norm": 0.15956686437129974, "learning_rate": 1.3059184544865632e-05, "loss": 0.4878, "step": 5363 }, { "epoch": 1.357630979498861, "grad_norm": 0.19305673241615295, "learning_rate": 1.3056904922457892e-05, "loss": 0.4802, "step": 5364 }, { "epoch": 1.357884079979752, "grad_norm": 0.15320143103599548, "learning_rate": 1.3054625124804274e-05, "loss": 0.4926, "step": 5365 }, { "epoch": 1.3581371804606428, "grad_norm": 0.15550391376018524, "learning_rate": 1.3052345152035477e-05, "loss": 0.4839, "step": 5366 }, { "epoch": 1.3583902809415338, "grad_norm": 0.16466914117336273, "learning_rate": 1.3050065004282209e-05, "loss": 0.4936, "step": 5367 }, { "epoch": 1.3586433814224246, "grad_norm": 0.15608473122119904, "learning_rate": 1.3047784681675184e-05, "loss": 0.4932, "step": 5368 }, { "epoch": 1.3588964819033156, "grad_norm": 0.16019484400749207, "learning_rate": 1.3045504184345128e-05, "loss": 0.5074, "step": 5369 }, { "epoch": 1.3591495823842066, "grad_norm": 0.15953586995601654, "learning_rate": 1.3043223512422775e-05, "loss": 0.4924, "step": 5370 }, { "epoch": 1.3594026828650976, "grad_norm": 0.1591566652059555, "learning_rate": 1.3040942666038877e-05, "loss": 0.4946, "step": 5371 }, { "epoch": 1.3596557833459884, "grad_norm": 0.16011033952236176, "learning_rate": 1.3038661645324186e-05, "loss": 0.5096, "step": 5372 }, { "epoch": 1.3599088838268791, "grad_norm": 0.1566564440727234, "learning_rate": 1.3036380450409464e-05, "loss": 0.5014, "step": 5373 }, { "epoch": 1.3601619843077701, "grad_norm": 0.15858131647109985, "learning_rate": 1.3034099081425496e-05, "loss": 0.5214, "step": 5374 }, { "epoch": 1.3604150847886611, "grad_norm": 0.16427603363990784, "learning_rate": 1.3031817538503064e-05, "loss": 0.4816, "step": 5375 }, { "epoch": 1.3606681852695521, "grad_norm": 0.16259001195430756, "learning_rate": 1.3029535821772963e-05, "loss": 0.4648, "step": 5376 }, { "epoch": 1.360921285750443, "grad_norm": 0.15703800320625305, "learning_rate": 1.3027253931366e-05, "loss": 0.4574, "step": 5377 }, { "epoch": 1.361174386231334, "grad_norm": 0.17922963201999664, "learning_rate": 1.302497186741299e-05, "loss": 0.487, "step": 5378 }, { "epoch": 1.3614274867122247, "grad_norm": 0.1569659560918808, "learning_rate": 1.3022689630044758e-05, "loss": 0.506, "step": 5379 }, { "epoch": 1.3616805871931157, "grad_norm": 0.15760083496570587, "learning_rate": 1.3020407219392144e-05, "loss": 0.4753, "step": 5380 }, { "epoch": 1.3619336876740067, "grad_norm": 0.15813110768795013, "learning_rate": 1.301812463558599e-05, "loss": 0.4752, "step": 5381 }, { "epoch": 1.3621867881548975, "grad_norm": 0.1585693359375, "learning_rate": 1.3015841878757154e-05, "loss": 0.4786, "step": 5382 }, { "epoch": 1.3624398886357885, "grad_norm": 0.1607729196548462, "learning_rate": 1.3013558949036497e-05, "loss": 0.4827, "step": 5383 }, { "epoch": 1.3626929891166792, "grad_norm": 0.1572989523410797, "learning_rate": 1.30112758465549e-05, "loss": 0.4887, "step": 5384 }, { "epoch": 1.3629460895975702, "grad_norm": 0.16595907509326935, "learning_rate": 1.3008992571443248e-05, "loss": 0.5066, "step": 5385 }, { "epoch": 1.3631991900784612, "grad_norm": 0.15891943871974945, "learning_rate": 1.3006709123832432e-05, "loss": 0.4742, "step": 5386 }, { "epoch": 1.363452290559352, "grad_norm": 0.1564321219921112, "learning_rate": 1.3004425503853361e-05, "loss": 0.4746, "step": 5387 }, { "epoch": 1.363705391040243, "grad_norm": 0.15756849944591522, "learning_rate": 1.300214171163695e-05, "loss": 0.4819, "step": 5388 }, { "epoch": 1.3639584915211338, "grad_norm": 0.16574639081954956, "learning_rate": 1.2999857747314125e-05, "loss": 0.4899, "step": 5389 }, { "epoch": 1.3642115920020248, "grad_norm": 0.15640227496623993, "learning_rate": 1.2997573611015812e-05, "loss": 0.4899, "step": 5390 }, { "epoch": 1.3644646924829158, "grad_norm": 0.15823857486248016, "learning_rate": 1.299528930287297e-05, "loss": 0.4814, "step": 5391 }, { "epoch": 1.3647177929638066, "grad_norm": 0.15737417340278625, "learning_rate": 1.299300482301654e-05, "loss": 0.4769, "step": 5392 }, { "epoch": 1.3649708934446976, "grad_norm": 0.16039876639842987, "learning_rate": 1.2990720171577495e-05, "loss": 0.5129, "step": 5393 }, { "epoch": 1.3652239939255884, "grad_norm": 0.15707284212112427, "learning_rate": 1.2988435348686808e-05, "loss": 0.4876, "step": 5394 }, { "epoch": 1.3654770944064794, "grad_norm": 0.15629073977470398, "learning_rate": 1.2986150354475458e-05, "loss": 0.4712, "step": 5395 }, { "epoch": 1.3657301948873704, "grad_norm": 0.1577952653169632, "learning_rate": 1.2983865189074444e-05, "loss": 0.4862, "step": 5396 }, { "epoch": 1.3659832953682611, "grad_norm": 0.15963894128799438, "learning_rate": 1.2981579852614771e-05, "loss": 0.4939, "step": 5397 }, { "epoch": 1.3662363958491521, "grad_norm": 0.15310204029083252, "learning_rate": 1.2979294345227444e-05, "loss": 0.4724, "step": 5398 }, { "epoch": 1.366489496330043, "grad_norm": 0.16203507781028748, "learning_rate": 1.2977008667043496e-05, "loss": 0.5044, "step": 5399 }, { "epoch": 1.366742596810934, "grad_norm": 0.16434936225414276, "learning_rate": 1.2974722818193959e-05, "loss": 0.4871, "step": 5400 }, { "epoch": 1.366995697291825, "grad_norm": 0.16586129367351532, "learning_rate": 1.2972436798809867e-05, "loss": 0.493, "step": 5401 }, { "epoch": 1.3672487977727157, "grad_norm": 0.16597889363765717, "learning_rate": 1.2970150609022283e-05, "loss": 0.5364, "step": 5402 }, { "epoch": 1.3675018982536067, "grad_norm": 0.16193127632141113, "learning_rate": 1.2967864248962263e-05, "loss": 0.4708, "step": 5403 }, { "epoch": 1.3677549987344975, "grad_norm": 0.15727004408836365, "learning_rate": 1.296557771876088e-05, "loss": 0.4743, "step": 5404 }, { "epoch": 1.3680080992153885, "grad_norm": 0.15492378175258636, "learning_rate": 1.296329101854922e-05, "loss": 0.4831, "step": 5405 }, { "epoch": 1.3682611996962795, "grad_norm": 0.16238227486610413, "learning_rate": 1.296100414845837e-05, "loss": 0.4726, "step": 5406 }, { "epoch": 1.3685143001771705, "grad_norm": 0.1603262573480606, "learning_rate": 1.2958717108619433e-05, "loss": 0.4802, "step": 5407 }, { "epoch": 1.3687674006580612, "grad_norm": 0.1580606997013092, "learning_rate": 1.295642989916352e-05, "loss": 0.4854, "step": 5408 }, { "epoch": 1.3690205011389522, "grad_norm": 0.16848139464855194, "learning_rate": 1.2954142520221753e-05, "loss": 0.4639, "step": 5409 }, { "epoch": 1.369273601619843, "grad_norm": 0.17022953927516937, "learning_rate": 1.2951854971925261e-05, "loss": 0.4827, "step": 5410 }, { "epoch": 1.369526702100734, "grad_norm": 0.1687416434288025, "learning_rate": 1.2949567254405187e-05, "loss": 0.5263, "step": 5411 }, { "epoch": 1.369779802581625, "grad_norm": 0.1651364266872406, "learning_rate": 1.2947279367792676e-05, "loss": 0.4911, "step": 5412 }, { "epoch": 1.3700329030625158, "grad_norm": 0.16224932670593262, "learning_rate": 1.2944991312218895e-05, "loss": 0.4841, "step": 5413 }, { "epoch": 1.3702860035434068, "grad_norm": 0.16012606024742126, "learning_rate": 1.2942703087815005e-05, "loss": 0.4766, "step": 5414 }, { "epoch": 1.3705391040242976, "grad_norm": 0.1591786891222, "learning_rate": 1.294041469471219e-05, "loss": 0.4802, "step": 5415 }, { "epoch": 1.3707922045051886, "grad_norm": 0.16311007738113403, "learning_rate": 1.2938126133041639e-05, "loss": 0.492, "step": 5416 }, { "epoch": 1.3710453049860796, "grad_norm": 0.1684475690126419, "learning_rate": 1.2935837402934547e-05, "loss": 0.4644, "step": 5417 }, { "epoch": 1.3712984054669703, "grad_norm": 0.1652650237083435, "learning_rate": 1.2933548504522126e-05, "loss": 0.4748, "step": 5418 }, { "epoch": 1.3715515059478613, "grad_norm": 0.15432867407798767, "learning_rate": 1.2931259437935597e-05, "loss": 0.4836, "step": 5419 }, { "epoch": 1.3718046064287521, "grad_norm": 0.15929223597049713, "learning_rate": 1.2928970203306175e-05, "loss": 0.493, "step": 5420 }, { "epoch": 1.3720577069096431, "grad_norm": 0.15707942843437195, "learning_rate": 1.2926680800765105e-05, "loss": 0.4763, "step": 5421 }, { "epoch": 1.3723108073905341, "grad_norm": 0.16068603098392487, "learning_rate": 1.2924391230443638e-05, "loss": 0.5212, "step": 5422 }, { "epoch": 1.372563907871425, "grad_norm": 0.15567424893379211, "learning_rate": 1.292210149247302e-05, "loss": 0.4835, "step": 5423 }, { "epoch": 1.372817008352316, "grad_norm": 0.15966524183750153, "learning_rate": 1.2919811586984525e-05, "loss": 0.4986, "step": 5424 }, { "epoch": 1.3730701088332067, "grad_norm": 0.1588864028453827, "learning_rate": 1.2917521514109424e-05, "loss": 0.5108, "step": 5425 }, { "epoch": 1.3733232093140977, "grad_norm": 0.1651369035243988, "learning_rate": 1.2915231273979003e-05, "loss": 0.489, "step": 5426 }, { "epoch": 1.3735763097949887, "grad_norm": 0.16061896085739136, "learning_rate": 1.2912940866724557e-05, "loss": 0.4973, "step": 5427 }, { "epoch": 1.3738294102758795, "grad_norm": 0.1623592972755432, "learning_rate": 1.2910650292477392e-05, "loss": 0.5107, "step": 5428 }, { "epoch": 1.3740825107567705, "grad_norm": 0.16325977444648743, "learning_rate": 1.2908359551368819e-05, "loss": 0.5142, "step": 5429 }, { "epoch": 1.3743356112376612, "grad_norm": 0.1648002415895462, "learning_rate": 1.290606864353016e-05, "loss": 0.4997, "step": 5430 }, { "epoch": 1.3745887117185522, "grad_norm": 0.15420913696289062, "learning_rate": 1.2903777569092752e-05, "loss": 0.4862, "step": 5431 }, { "epoch": 1.3748418121994432, "grad_norm": 0.1588687151670456, "learning_rate": 1.2901486328187935e-05, "loss": 0.4834, "step": 5432 }, { "epoch": 1.375094912680334, "grad_norm": 0.1604485958814621, "learning_rate": 1.2899194920947062e-05, "loss": 0.5139, "step": 5433 }, { "epoch": 1.375348013161225, "grad_norm": 0.1566561907529831, "learning_rate": 1.2896903347501496e-05, "loss": 0.4797, "step": 5434 }, { "epoch": 1.3756011136421158, "grad_norm": 0.15664264559745789, "learning_rate": 1.2894611607982603e-05, "loss": 0.4912, "step": 5435 }, { "epoch": 1.3758542141230068, "grad_norm": 0.16019538044929504, "learning_rate": 1.289231970252177e-05, "loss": 0.5055, "step": 5436 }, { "epoch": 1.3761073146038978, "grad_norm": 0.19085852801799774, "learning_rate": 1.289002763125038e-05, "loss": 0.4683, "step": 5437 }, { "epoch": 1.3763604150847888, "grad_norm": 0.697837769985199, "learning_rate": 1.288773539429984e-05, "loss": 0.4869, "step": 5438 }, { "epoch": 1.3766135155656796, "grad_norm": 0.15981483459472656, "learning_rate": 1.288544299180155e-05, "loss": 0.484, "step": 5439 }, { "epoch": 1.3768666160465706, "grad_norm": 0.16346552968025208, "learning_rate": 1.288315042388694e-05, "loss": 0.5192, "step": 5440 }, { "epoch": 1.3771197165274613, "grad_norm": 0.15998664498329163, "learning_rate": 1.2880857690687431e-05, "loss": 0.479, "step": 5441 }, { "epoch": 1.3773728170083523, "grad_norm": 0.1575089395046234, "learning_rate": 1.2878564792334459e-05, "loss": 0.4913, "step": 5442 }, { "epoch": 1.3776259174892433, "grad_norm": 0.1588413268327713, "learning_rate": 1.2876271728959473e-05, "loss": 0.4862, "step": 5443 }, { "epoch": 1.3778790179701341, "grad_norm": 0.1593618243932724, "learning_rate": 1.2873978500693935e-05, "loss": 0.5046, "step": 5444 }, { "epoch": 1.3781321184510251, "grad_norm": 0.1633927971124649, "learning_rate": 1.2871685107669302e-05, "loss": 0.4994, "step": 5445 }, { "epoch": 1.378385218931916, "grad_norm": 0.15766243636608124, "learning_rate": 1.2869391550017052e-05, "loss": 0.4919, "step": 5446 }, { "epoch": 1.378638319412807, "grad_norm": 0.15845458209514618, "learning_rate": 1.2867097827868675e-05, "loss": 0.4859, "step": 5447 }, { "epoch": 1.378891419893698, "grad_norm": 0.16205254197120667, "learning_rate": 1.2864803941355659e-05, "loss": 0.5068, "step": 5448 }, { "epoch": 1.3791445203745887, "grad_norm": 0.16030874848365784, "learning_rate": 1.2862509890609512e-05, "loss": 0.466, "step": 5449 }, { "epoch": 1.3793976208554797, "grad_norm": 0.1646445244550705, "learning_rate": 1.2860215675761742e-05, "loss": 0.4997, "step": 5450 }, { "epoch": 1.3796507213363705, "grad_norm": 0.15965671837329865, "learning_rate": 1.2857921296943876e-05, "loss": 0.5056, "step": 5451 }, { "epoch": 1.3799038218172615, "grad_norm": 0.15403400361537933, "learning_rate": 1.2855626754287445e-05, "loss": 0.4815, "step": 5452 }, { "epoch": 1.3801569222981525, "grad_norm": 0.15577055513858795, "learning_rate": 1.285333204792399e-05, "loss": 0.4732, "step": 5453 }, { "epoch": 1.3804100227790432, "grad_norm": 0.15903879702091217, "learning_rate": 1.285103717798506e-05, "loss": 0.4911, "step": 5454 }, { "epoch": 1.3806631232599342, "grad_norm": 0.16013123095035553, "learning_rate": 1.2848742144602215e-05, "loss": 0.4991, "step": 5455 }, { "epoch": 1.380916223740825, "grad_norm": 0.1561097949743271, "learning_rate": 1.284644694790703e-05, "loss": 0.5036, "step": 5456 }, { "epoch": 1.381169324221716, "grad_norm": 0.16662146151065826, "learning_rate": 1.2844151588031078e-05, "loss": 0.5005, "step": 5457 }, { "epoch": 1.381422424702607, "grad_norm": 0.15924973785877228, "learning_rate": 1.2841856065105947e-05, "loss": 0.5061, "step": 5458 }, { "epoch": 1.3816755251834978, "grad_norm": 0.1580783575773239, "learning_rate": 1.2839560379263234e-05, "loss": 0.4988, "step": 5459 }, { "epoch": 1.3819286256643888, "grad_norm": 0.1576288938522339, "learning_rate": 1.2837264530634554e-05, "loss": 0.5024, "step": 5460 }, { "epoch": 1.3821817261452796, "grad_norm": 0.1629394292831421, "learning_rate": 1.2834968519351512e-05, "loss": 0.5102, "step": 5461 }, { "epoch": 1.3824348266261706, "grad_norm": 0.17420004308223724, "learning_rate": 1.283267234554574e-05, "loss": 0.4869, "step": 5462 }, { "epoch": 1.3826879271070616, "grad_norm": 0.15767453610897064, "learning_rate": 1.2830376009348873e-05, "loss": 0.4754, "step": 5463 }, { "epoch": 1.3829410275879523, "grad_norm": 0.1605314165353775, "learning_rate": 1.2828079510892548e-05, "loss": 0.4789, "step": 5464 }, { "epoch": 1.3831941280688433, "grad_norm": 0.15906894207000732, "learning_rate": 1.2825782850308428e-05, "loss": 0.4946, "step": 5465 }, { "epoch": 1.3834472285497341, "grad_norm": 0.15978744626045227, "learning_rate": 1.2823486027728171e-05, "loss": 0.486, "step": 5466 }, { "epoch": 1.3837003290306251, "grad_norm": 0.17347361147403717, "learning_rate": 1.2821189043283451e-05, "loss": 0.4843, "step": 5467 }, { "epoch": 1.3839534295115161, "grad_norm": 0.15888258814811707, "learning_rate": 1.2818891897105943e-05, "loss": 0.4762, "step": 5468 }, { "epoch": 1.3842065299924071, "grad_norm": 0.16137917339801788, "learning_rate": 1.2816594589327344e-05, "loss": 0.4989, "step": 5469 }, { "epoch": 1.384459630473298, "grad_norm": 0.1614619791507721, "learning_rate": 1.2814297120079355e-05, "loss": 0.4977, "step": 5470 }, { "epoch": 1.3847127309541887, "grad_norm": 0.15797261893749237, "learning_rate": 1.2811999489493677e-05, "loss": 0.4911, "step": 5471 }, { "epoch": 1.3849658314350797, "grad_norm": 0.1573764979839325, "learning_rate": 1.2809701697702038e-05, "loss": 0.5051, "step": 5472 }, { "epoch": 1.3852189319159707, "grad_norm": 0.16644535958766937, "learning_rate": 1.2807403744836157e-05, "loss": 0.5043, "step": 5473 }, { "epoch": 1.3854720323968617, "grad_norm": 0.1616896390914917, "learning_rate": 1.2805105631027774e-05, "loss": 0.497, "step": 5474 }, { "epoch": 1.3857251328777525, "grad_norm": 0.16640593111515045, "learning_rate": 1.280280735640864e-05, "loss": 0.4902, "step": 5475 }, { "epoch": 1.3859782333586435, "grad_norm": 0.16214632987976074, "learning_rate": 1.2800508921110502e-05, "loss": 0.4792, "step": 5476 }, { "epoch": 1.3862313338395342, "grad_norm": 0.15869495272636414, "learning_rate": 1.2798210325265129e-05, "loss": 0.4785, "step": 5477 }, { "epoch": 1.3864844343204252, "grad_norm": 0.1603357195854187, "learning_rate": 1.2795911569004293e-05, "loss": 0.4788, "step": 5478 }, { "epoch": 1.3867375348013162, "grad_norm": 0.15970326960086823, "learning_rate": 1.279361265245978e-05, "loss": 0.5117, "step": 5479 }, { "epoch": 1.386990635282207, "grad_norm": 0.15824967622756958, "learning_rate": 1.2791313575763376e-05, "loss": 0.476, "step": 5480 }, { "epoch": 1.387243735763098, "grad_norm": 0.15873469412326813, "learning_rate": 1.2789014339046888e-05, "loss": 0.4783, "step": 5481 }, { "epoch": 1.3874968362439888, "grad_norm": 0.16103583574295044, "learning_rate": 1.2786714942442124e-05, "loss": 0.5083, "step": 5482 }, { "epoch": 1.3877499367248798, "grad_norm": 0.16006579995155334, "learning_rate": 1.2784415386080904e-05, "loss": 0.5089, "step": 5483 }, { "epoch": 1.3880030372057708, "grad_norm": 0.16804282367229462, "learning_rate": 1.2782115670095054e-05, "loss": 0.4845, "step": 5484 }, { "epoch": 1.3882561376866616, "grad_norm": 0.16122007369995117, "learning_rate": 1.277981579461642e-05, "loss": 0.4814, "step": 5485 }, { "epoch": 1.3885092381675526, "grad_norm": 0.16337081789970398, "learning_rate": 1.2777515759776834e-05, "loss": 0.5026, "step": 5486 }, { "epoch": 1.3887623386484433, "grad_norm": 0.15989552438259125, "learning_rate": 1.2775215565708168e-05, "loss": 0.4806, "step": 5487 }, { "epoch": 1.3890154391293343, "grad_norm": 0.16486889123916626, "learning_rate": 1.2772915212542281e-05, "loss": 0.4828, "step": 5488 }, { "epoch": 1.3892685396102253, "grad_norm": 0.15917259454727173, "learning_rate": 1.2770614700411045e-05, "loss": 0.4902, "step": 5489 }, { "epoch": 1.3895216400911161, "grad_norm": 0.26186463236808777, "learning_rate": 1.2768314029446342e-05, "loss": 0.4965, "step": 5490 }, { "epoch": 1.3897747405720071, "grad_norm": 0.16015346348285675, "learning_rate": 1.2766013199780075e-05, "loss": 0.4792, "step": 5491 }, { "epoch": 1.390027841052898, "grad_norm": 0.20893555879592896, "learning_rate": 1.2763712211544133e-05, "loss": 0.4671, "step": 5492 }, { "epoch": 1.390280941533789, "grad_norm": 0.15890660881996155, "learning_rate": 1.2761411064870433e-05, "loss": 0.4705, "step": 5493 }, { "epoch": 1.39053404201468, "grad_norm": 0.15544959902763367, "learning_rate": 1.2759109759890898e-05, "loss": 0.4838, "step": 5494 }, { "epoch": 1.3907871424955707, "grad_norm": 0.1563069373369217, "learning_rate": 1.2756808296737449e-05, "loss": 0.4877, "step": 5495 }, { "epoch": 1.3910402429764617, "grad_norm": 0.1588369905948639, "learning_rate": 1.275450667554203e-05, "loss": 0.5027, "step": 5496 }, { "epoch": 1.3912933434573524, "grad_norm": 0.15630540251731873, "learning_rate": 1.2752204896436587e-05, "loss": 0.4711, "step": 5497 }, { "epoch": 1.3915464439382434, "grad_norm": 0.16509056091308594, "learning_rate": 1.2749902959553073e-05, "loss": 0.5078, "step": 5498 }, { "epoch": 1.3917995444191344, "grad_norm": 0.16176167130470276, "learning_rate": 1.2747600865023458e-05, "loss": 0.4789, "step": 5499 }, { "epoch": 1.3920526449000254, "grad_norm": 0.16351306438446045, "learning_rate": 1.2745298612979713e-05, "loss": 0.4911, "step": 5500 }, { "epoch": 1.3923057453809162, "grad_norm": 0.1722777634859085, "learning_rate": 1.2742996203553822e-05, "loss": 0.5115, "step": 5501 }, { "epoch": 1.392558845861807, "grad_norm": 0.16285370290279388, "learning_rate": 1.2740693636877777e-05, "loss": 0.4885, "step": 5502 }, { "epoch": 1.392811946342698, "grad_norm": 0.1577533483505249, "learning_rate": 1.2738390913083578e-05, "loss": 0.4935, "step": 5503 }, { "epoch": 1.393065046823589, "grad_norm": 0.16875077784061432, "learning_rate": 1.273608803230324e-05, "loss": 0.4943, "step": 5504 }, { "epoch": 1.39331814730448, "grad_norm": 0.15627582371234894, "learning_rate": 1.2733784994668777e-05, "loss": 0.4647, "step": 5505 }, { "epoch": 1.3935712477853708, "grad_norm": 0.15486443042755127, "learning_rate": 1.273148180031222e-05, "loss": 0.4818, "step": 5506 }, { "epoch": 1.3938243482662618, "grad_norm": 0.16161465644836426, "learning_rate": 1.2729178449365605e-05, "loss": 0.5088, "step": 5507 }, { "epoch": 1.3940774487471526, "grad_norm": 0.16670376062393188, "learning_rate": 1.2726874941960979e-05, "loss": 0.4937, "step": 5508 }, { "epoch": 1.3943305492280436, "grad_norm": 0.15827488899230957, "learning_rate": 1.2724571278230395e-05, "loss": 0.466, "step": 5509 }, { "epoch": 1.3945836497089346, "grad_norm": 0.2023501992225647, "learning_rate": 1.2722267458305924e-05, "loss": 0.4893, "step": 5510 }, { "epoch": 1.3948367501898253, "grad_norm": 0.16095532476902008, "learning_rate": 1.2719963482319631e-05, "loss": 0.4988, "step": 5511 }, { "epoch": 1.3950898506707163, "grad_norm": 0.15618866682052612, "learning_rate": 1.2717659350403598e-05, "loss": 0.4799, "step": 5512 }, { "epoch": 1.3953429511516071, "grad_norm": 0.16001594066619873, "learning_rate": 1.2715355062689925e-05, "loss": 0.5079, "step": 5513 }, { "epoch": 1.3955960516324981, "grad_norm": 0.17589369416236877, "learning_rate": 1.2713050619310703e-05, "loss": 0.4714, "step": 5514 }, { "epoch": 1.3958491521133891, "grad_norm": 0.15957142412662506, "learning_rate": 1.2710746020398043e-05, "loss": 0.4994, "step": 5515 }, { "epoch": 1.39610225259428, "grad_norm": 0.1558864563703537, "learning_rate": 1.2708441266084068e-05, "loss": 0.4902, "step": 5516 }, { "epoch": 1.396355353075171, "grad_norm": 0.1570233553647995, "learning_rate": 1.2706136356500898e-05, "loss": 0.4605, "step": 5517 }, { "epoch": 1.3966084535560617, "grad_norm": 0.19464223086833954, "learning_rate": 1.2703831291780668e-05, "loss": 0.4957, "step": 5518 }, { "epoch": 1.3968615540369527, "grad_norm": 0.15670989453792572, "learning_rate": 1.270152607205553e-05, "loss": 0.4968, "step": 5519 }, { "epoch": 1.3971146545178437, "grad_norm": 0.16523510217666626, "learning_rate": 1.2699220697457632e-05, "loss": 0.5027, "step": 5520 }, { "epoch": 1.3973677549987344, "grad_norm": 0.1600266695022583, "learning_rate": 1.2696915168119136e-05, "loss": 0.5004, "step": 5521 }, { "epoch": 1.3976208554796254, "grad_norm": 0.1623755693435669, "learning_rate": 1.2694609484172215e-05, "loss": 0.4804, "step": 5522 }, { "epoch": 1.3978739559605162, "grad_norm": 0.160418301820755, "learning_rate": 1.2692303645749049e-05, "loss": 0.491, "step": 5523 }, { "epoch": 1.3981270564414072, "grad_norm": 0.16007478535175323, "learning_rate": 1.2689997652981825e-05, "loss": 0.4933, "step": 5524 }, { "epoch": 1.3983801569222982, "grad_norm": 0.15257468819618225, "learning_rate": 1.2687691506002742e-05, "loss": 0.4922, "step": 5525 }, { "epoch": 1.398633257403189, "grad_norm": 0.1582288146018982, "learning_rate": 1.2685385204944005e-05, "loss": 0.4893, "step": 5526 }, { "epoch": 1.39888635788408, "grad_norm": 0.16200709342956543, "learning_rate": 1.2683078749937832e-05, "loss": 0.5002, "step": 5527 }, { "epoch": 1.3991394583649708, "grad_norm": 0.15223857760429382, "learning_rate": 1.2680772141116444e-05, "loss": 0.4802, "step": 5528 }, { "epoch": 1.3993925588458618, "grad_norm": 0.16868436336517334, "learning_rate": 1.2678465378612077e-05, "loss": 0.4793, "step": 5529 }, { "epoch": 1.3996456593267528, "grad_norm": 0.1545170694589615, "learning_rate": 1.2676158462556973e-05, "loss": 0.4799, "step": 5530 }, { "epoch": 1.3998987598076436, "grad_norm": 0.15970417857170105, "learning_rate": 1.2673851393083378e-05, "loss": 0.4921, "step": 5531 }, { "epoch": 1.4001518602885346, "grad_norm": 0.1650785654783249, "learning_rate": 1.2671544170323558e-05, "loss": 0.5127, "step": 5532 }, { "epoch": 1.4004049607694253, "grad_norm": 0.15834848582744598, "learning_rate": 1.2669236794409776e-05, "loss": 0.4948, "step": 5533 }, { "epoch": 1.4006580612503163, "grad_norm": 0.15938159823417664, "learning_rate": 1.2666929265474308e-05, "loss": 0.4598, "step": 5534 }, { "epoch": 1.4009111617312073, "grad_norm": 0.15658152103424072, "learning_rate": 1.2664621583649448e-05, "loss": 0.518, "step": 5535 }, { "epoch": 1.4011642622120983, "grad_norm": 0.15653349459171295, "learning_rate": 1.2662313749067483e-05, "loss": 0.5152, "step": 5536 }, { "epoch": 1.401417362692989, "grad_norm": 0.15777161717414856, "learning_rate": 1.2660005761860714e-05, "loss": 0.4993, "step": 5537 }, { "epoch": 1.40167046317388, "grad_norm": 0.22846300899982452, "learning_rate": 1.2657697622161462e-05, "loss": 0.4867, "step": 5538 }, { "epoch": 1.4019235636547709, "grad_norm": 0.15530575811862946, "learning_rate": 1.2655389330102042e-05, "loss": 0.5239, "step": 5539 }, { "epoch": 1.4021766641356619, "grad_norm": 0.16174142062664032, "learning_rate": 1.265308088581478e-05, "loss": 0.4967, "step": 5540 }, { "epoch": 1.4024297646165529, "grad_norm": 0.16280382871627808, "learning_rate": 1.2650772289432025e-05, "loss": 0.5003, "step": 5541 }, { "epoch": 1.4026828650974437, "grad_norm": 0.16163934767246246, "learning_rate": 1.2648463541086113e-05, "loss": 0.5196, "step": 5542 }, { "epoch": 1.4029359655783347, "grad_norm": 0.16418857872486115, "learning_rate": 1.2646154640909408e-05, "loss": 0.4592, "step": 5543 }, { "epoch": 1.4031890660592254, "grad_norm": 0.16091331839561462, "learning_rate": 1.264384558903427e-05, "loss": 0.4842, "step": 5544 }, { "epoch": 1.4034421665401164, "grad_norm": 0.16198372840881348, "learning_rate": 1.264153638559307e-05, "loss": 0.4835, "step": 5545 }, { "epoch": 1.4036952670210074, "grad_norm": 0.15470705926418304, "learning_rate": 1.2639227030718193e-05, "loss": 0.4684, "step": 5546 }, { "epoch": 1.4039483675018982, "grad_norm": 0.15770424902439117, "learning_rate": 1.263691752454203e-05, "loss": 0.4917, "step": 5547 }, { "epoch": 1.4042014679827892, "grad_norm": 0.15552891790866852, "learning_rate": 1.2634607867196977e-05, "loss": 0.4696, "step": 5548 }, { "epoch": 1.40445456846368, "grad_norm": 0.1596030294895172, "learning_rate": 1.2632298058815446e-05, "loss": 0.4979, "step": 5549 }, { "epoch": 1.404707668944571, "grad_norm": 0.15909749269485474, "learning_rate": 1.2629988099529849e-05, "loss": 0.4935, "step": 5550 }, { "epoch": 1.404960769425462, "grad_norm": 0.16339799761772156, "learning_rate": 1.2627677989472615e-05, "loss": 0.5083, "step": 5551 }, { "epoch": 1.4052138699063528, "grad_norm": 0.1719578355550766, "learning_rate": 1.2625367728776176e-05, "loss": 0.4979, "step": 5552 }, { "epoch": 1.4054669703872438, "grad_norm": 0.16437843441963196, "learning_rate": 1.262305731757297e-05, "loss": 0.5108, "step": 5553 }, { "epoch": 1.4057200708681346, "grad_norm": 0.1608104556798935, "learning_rate": 1.2620746755995454e-05, "loss": 0.5104, "step": 5554 }, { "epoch": 1.4059731713490256, "grad_norm": 0.15659570693969727, "learning_rate": 1.261843604417609e-05, "loss": 0.4694, "step": 5555 }, { "epoch": 1.4062262718299166, "grad_norm": 0.16248109936714172, "learning_rate": 1.2616125182247339e-05, "loss": 0.4778, "step": 5556 }, { "epoch": 1.4064793723108073, "grad_norm": 0.15831118822097778, "learning_rate": 1.2613814170341681e-05, "loss": 0.485, "step": 5557 }, { "epoch": 1.4067324727916983, "grad_norm": 0.15937742590904236, "learning_rate": 1.2611503008591602e-05, "loss": 0.5013, "step": 5558 }, { "epoch": 1.406985573272589, "grad_norm": 0.16841067373752594, "learning_rate": 1.260919169712959e-05, "loss": 0.4944, "step": 5559 }, { "epoch": 1.40723867375348, "grad_norm": 0.1641966849565506, "learning_rate": 1.2606880236088159e-05, "loss": 0.5073, "step": 5560 }, { "epoch": 1.407491774234371, "grad_norm": 0.16108587384223938, "learning_rate": 1.260456862559981e-05, "loss": 0.4893, "step": 5561 }, { "epoch": 1.4077448747152619, "grad_norm": 0.15416717529296875, "learning_rate": 1.2602256865797067e-05, "loss": 0.4649, "step": 5562 }, { "epoch": 1.4079979751961529, "grad_norm": 0.16200564801692963, "learning_rate": 1.2599944956812459e-05, "loss": 0.4893, "step": 5563 }, { "epoch": 1.4082510756770437, "grad_norm": 0.15786898136138916, "learning_rate": 1.2597632898778522e-05, "loss": 0.4907, "step": 5564 }, { "epoch": 1.4085041761579347, "grad_norm": 0.15677544474601746, "learning_rate": 1.2595320691827801e-05, "loss": 0.4923, "step": 5565 }, { "epoch": 1.4087572766388257, "grad_norm": 0.15802599489688873, "learning_rate": 1.2593008336092851e-05, "loss": 0.4719, "step": 5566 }, { "epoch": 1.4090103771197167, "grad_norm": 0.2040954977273941, "learning_rate": 1.2590695831706233e-05, "loss": 0.4782, "step": 5567 }, { "epoch": 1.4092634776006074, "grad_norm": 0.15894001722335815, "learning_rate": 1.2588383178800517e-05, "loss": 0.4719, "step": 5568 }, { "epoch": 1.4095165780814982, "grad_norm": 0.15686224400997162, "learning_rate": 1.2586070377508284e-05, "loss": 0.4914, "step": 5569 }, { "epoch": 1.4097696785623892, "grad_norm": 0.15721604228019714, "learning_rate": 1.2583757427962121e-05, "loss": 0.4837, "step": 5570 }, { "epoch": 1.4100227790432802, "grad_norm": 0.16316750645637512, "learning_rate": 1.2581444330294628e-05, "loss": 0.4899, "step": 5571 }, { "epoch": 1.4102758795241712, "grad_norm": 0.16207927465438843, "learning_rate": 1.2579131084638408e-05, "loss": 0.5065, "step": 5572 }, { "epoch": 1.410528980005062, "grad_norm": 0.1594908982515335, "learning_rate": 1.257681769112607e-05, "loss": 0.5073, "step": 5573 }, { "epoch": 1.410782080485953, "grad_norm": 0.15406562387943268, "learning_rate": 1.2574504149890246e-05, "loss": 0.4707, "step": 5574 }, { "epoch": 1.4110351809668438, "grad_norm": 0.16134901344776154, "learning_rate": 1.2572190461063554e-05, "loss": 0.472, "step": 5575 }, { "epoch": 1.4112882814477348, "grad_norm": 0.15741609036922455, "learning_rate": 1.2569876624778643e-05, "loss": 0.4882, "step": 5576 }, { "epoch": 1.4115413819286258, "grad_norm": 0.16273514926433563, "learning_rate": 1.2567562641168156e-05, "loss": 0.4706, "step": 5577 }, { "epoch": 1.4117944824095165, "grad_norm": 0.15622203052043915, "learning_rate": 1.2565248510364745e-05, "loss": 0.4855, "step": 5578 }, { "epoch": 1.4120475828904075, "grad_norm": 0.15626060962677002, "learning_rate": 1.2562934232501084e-05, "loss": 0.484, "step": 5579 }, { "epoch": 1.4123006833712983, "grad_norm": 0.15780194103717804, "learning_rate": 1.2560619807709839e-05, "loss": 0.4752, "step": 5580 }, { "epoch": 1.4125537838521893, "grad_norm": 0.1581398993730545, "learning_rate": 1.2558305236123687e-05, "loss": 0.5181, "step": 5581 }, { "epoch": 1.4128068843330803, "grad_norm": 0.1583554744720459, "learning_rate": 1.255599051787533e-05, "loss": 0.4957, "step": 5582 }, { "epoch": 1.413059984813971, "grad_norm": 0.15890783071517944, "learning_rate": 1.2553675653097454e-05, "loss": 0.5021, "step": 5583 }, { "epoch": 1.413313085294862, "grad_norm": 0.1531054824590683, "learning_rate": 1.2551360641922773e-05, "loss": 0.4758, "step": 5584 }, { "epoch": 1.4135661857757529, "grad_norm": 0.1554764360189438, "learning_rate": 1.2549045484483995e-05, "loss": 0.4627, "step": 5585 }, { "epoch": 1.4138192862566439, "grad_norm": 0.15925107896327972, "learning_rate": 1.2546730180913847e-05, "loss": 0.4884, "step": 5586 }, { "epoch": 1.4140723867375349, "grad_norm": 0.16074351966381073, "learning_rate": 1.254441473134506e-05, "loss": 0.5066, "step": 5587 }, { "epoch": 1.4143254872184257, "grad_norm": 0.15665696561336517, "learning_rate": 1.2542099135910375e-05, "loss": 0.483, "step": 5588 }, { "epoch": 1.4145785876993167, "grad_norm": 0.1642092913389206, "learning_rate": 1.2539783394742537e-05, "loss": 0.492, "step": 5589 }, { "epoch": 1.4148316881802074, "grad_norm": 0.1632407158613205, "learning_rate": 1.2537467507974306e-05, "loss": 0.4829, "step": 5590 }, { "epoch": 1.4150847886610984, "grad_norm": 0.19726379215717316, "learning_rate": 1.2535151475738446e-05, "loss": 0.4863, "step": 5591 }, { "epoch": 1.4153378891419894, "grad_norm": 0.1597345471382141, "learning_rate": 1.2532835298167729e-05, "loss": 0.5139, "step": 5592 }, { "epoch": 1.4155909896228802, "grad_norm": 0.15737684071063995, "learning_rate": 1.2530518975394936e-05, "loss": 0.4853, "step": 5593 }, { "epoch": 1.4158440901037712, "grad_norm": 0.15906618535518646, "learning_rate": 1.2528202507552857e-05, "loss": 0.4856, "step": 5594 }, { "epoch": 1.416097190584662, "grad_norm": 0.16222305595874786, "learning_rate": 1.2525885894774294e-05, "loss": 0.4767, "step": 5595 }, { "epoch": 1.416350291065553, "grad_norm": 0.15979614853858948, "learning_rate": 1.2523569137192051e-05, "loss": 0.4966, "step": 5596 }, { "epoch": 1.416603391546444, "grad_norm": 0.15857428312301636, "learning_rate": 1.252125223493894e-05, "loss": 0.4927, "step": 5597 }, { "epoch": 1.416856492027335, "grad_norm": 0.1657688021659851, "learning_rate": 1.2518935188147787e-05, "loss": 0.4638, "step": 5598 }, { "epoch": 1.4171095925082258, "grad_norm": 0.15677371621131897, "learning_rate": 1.2516617996951426e-05, "loss": 0.4989, "step": 5599 }, { "epoch": 1.4173626929891165, "grad_norm": 0.16580429673194885, "learning_rate": 1.251430066148269e-05, "loss": 0.4934, "step": 5600 }, { "epoch": 1.4176157934700075, "grad_norm": 0.1628570407629013, "learning_rate": 1.2511983181874432e-05, "loss": 0.5133, "step": 5601 }, { "epoch": 1.4178688939508985, "grad_norm": 0.16622862219810486, "learning_rate": 1.2509665558259508e-05, "loss": 0.4947, "step": 5602 }, { "epoch": 1.4181219944317895, "grad_norm": 0.17066824436187744, "learning_rate": 1.2507347790770776e-05, "loss": 0.4961, "step": 5603 }, { "epoch": 1.4183750949126803, "grad_norm": 0.158124640583992, "learning_rate": 1.250502987954112e-05, "loss": 0.4959, "step": 5604 }, { "epoch": 1.4186281953935713, "grad_norm": 0.15959544479846954, "learning_rate": 1.2502711824703414e-05, "loss": 0.4628, "step": 5605 }, { "epoch": 1.418881295874462, "grad_norm": 0.16014553606510162, "learning_rate": 1.2500393626390545e-05, "loss": 0.4735, "step": 5606 }, { "epoch": 1.419134396355353, "grad_norm": 0.1594773381948471, "learning_rate": 1.2498075284735416e-05, "loss": 0.4689, "step": 5607 }, { "epoch": 1.419387496836244, "grad_norm": 0.16202712059020996, "learning_rate": 1.2495756799870926e-05, "loss": 0.5055, "step": 5608 }, { "epoch": 1.4196405973171349, "grad_norm": 0.16116967797279358, "learning_rate": 1.2493438171929997e-05, "loss": 0.486, "step": 5609 }, { "epoch": 1.4198936977980259, "grad_norm": 0.15855009853839874, "learning_rate": 1.2491119401045543e-05, "loss": 0.4782, "step": 5610 }, { "epoch": 1.4201467982789167, "grad_norm": 0.15599636733531952, "learning_rate": 1.2488800487350501e-05, "loss": 0.4782, "step": 5611 }, { "epoch": 1.4203998987598077, "grad_norm": 0.1628798544406891, "learning_rate": 1.2486481430977807e-05, "loss": 0.5028, "step": 5612 }, { "epoch": 1.4206529992406987, "grad_norm": 0.16032668948173523, "learning_rate": 1.2484162232060405e-05, "loss": 0.475, "step": 5613 }, { "epoch": 1.4209060997215894, "grad_norm": 0.15988194942474365, "learning_rate": 1.2481842890731251e-05, "loss": 0.4848, "step": 5614 }, { "epoch": 1.4211592002024804, "grad_norm": 0.16368697583675385, "learning_rate": 1.247952340712331e-05, "loss": 0.4958, "step": 5615 }, { "epoch": 1.4214123006833712, "grad_norm": 0.15838927030563354, "learning_rate": 1.2477203781369552e-05, "loss": 0.5006, "step": 5616 }, { "epoch": 1.4216654011642622, "grad_norm": 0.5066993236541748, "learning_rate": 1.2474884013602955e-05, "loss": 0.4896, "step": 5617 }, { "epoch": 1.4219185016451532, "grad_norm": 0.16188634932041168, "learning_rate": 1.247256410395651e-05, "loss": 0.4937, "step": 5618 }, { "epoch": 1.422171602126044, "grad_norm": 0.1627260148525238, "learning_rate": 1.2470244052563206e-05, "loss": 0.4839, "step": 5619 }, { "epoch": 1.422424702606935, "grad_norm": 0.16570504009723663, "learning_rate": 1.2467923859556052e-05, "loss": 0.5029, "step": 5620 }, { "epoch": 1.4226778030878258, "grad_norm": 0.15819938480854034, "learning_rate": 1.246560352506806e-05, "loss": 0.4837, "step": 5621 }, { "epoch": 1.4229309035687168, "grad_norm": 0.16796299815177917, "learning_rate": 1.2463283049232245e-05, "loss": 0.487, "step": 5622 }, { "epoch": 1.4231840040496078, "grad_norm": 0.16752269864082336, "learning_rate": 1.246096243218164e-05, "loss": 0.4747, "step": 5623 }, { "epoch": 1.4234371045304985, "grad_norm": 0.16382841765880585, "learning_rate": 1.2458641674049278e-05, "loss": 0.4868, "step": 5624 }, { "epoch": 1.4236902050113895, "grad_norm": 0.1659468412399292, "learning_rate": 1.2456320774968205e-05, "loss": 0.5115, "step": 5625 }, { "epoch": 1.4239433054922803, "grad_norm": 0.16353927552700043, "learning_rate": 1.2453999735071472e-05, "loss": 0.4688, "step": 5626 }, { "epoch": 1.4241964059731713, "grad_norm": 0.16477906703948975, "learning_rate": 1.2451678554492141e-05, "loss": 0.4912, "step": 5627 }, { "epoch": 1.4244495064540623, "grad_norm": 0.16429002583026886, "learning_rate": 1.2449357233363278e-05, "loss": 0.5191, "step": 5628 }, { "epoch": 1.424702606934953, "grad_norm": 0.16305558383464813, "learning_rate": 1.2447035771817958e-05, "loss": 0.4877, "step": 5629 }, { "epoch": 1.424955707415844, "grad_norm": 0.18495027720928192, "learning_rate": 1.2444714169989273e-05, "loss": 0.4966, "step": 5630 }, { "epoch": 1.4252088078967349, "grad_norm": 0.17920176684856415, "learning_rate": 1.2442392428010308e-05, "loss": 0.4721, "step": 5631 }, { "epoch": 1.4254619083776259, "grad_norm": 0.16083073616027832, "learning_rate": 1.2440070546014168e-05, "loss": 0.4737, "step": 5632 }, { "epoch": 1.4257150088585169, "grad_norm": 0.15409302711486816, "learning_rate": 1.2437748524133955e-05, "loss": 0.4631, "step": 5633 }, { "epoch": 1.4259681093394079, "grad_norm": 0.15540309250354767, "learning_rate": 1.2435426362502794e-05, "loss": 0.4934, "step": 5634 }, { "epoch": 1.4262212098202987, "grad_norm": 0.16040171682834625, "learning_rate": 1.2433104061253803e-05, "loss": 0.478, "step": 5635 }, { "epoch": 1.4264743103011897, "grad_norm": 0.15538759529590607, "learning_rate": 1.2430781620520117e-05, "loss": 0.4928, "step": 5636 }, { "epoch": 1.4267274107820804, "grad_norm": 0.16305993497371674, "learning_rate": 1.2428459040434882e-05, "loss": 0.4795, "step": 5637 }, { "epoch": 1.4269805112629714, "grad_norm": 0.16755643486976624, "learning_rate": 1.2426136321131237e-05, "loss": 0.5086, "step": 5638 }, { "epoch": 1.4272336117438624, "grad_norm": 0.16908270120620728, "learning_rate": 1.2423813462742344e-05, "loss": 0.4923, "step": 5639 }, { "epoch": 1.4274867122247532, "grad_norm": 0.16163389384746552, "learning_rate": 1.2421490465401367e-05, "loss": 0.4883, "step": 5640 }, { "epoch": 1.4277398127056442, "grad_norm": 0.1603579968214035, "learning_rate": 1.2419167329241479e-05, "loss": 0.4875, "step": 5641 }, { "epoch": 1.427992913186535, "grad_norm": 0.16223473846912384, "learning_rate": 1.2416844054395858e-05, "loss": 0.4839, "step": 5642 }, { "epoch": 1.428246013667426, "grad_norm": 0.15983036160469055, "learning_rate": 1.2414520640997698e-05, "loss": 0.5039, "step": 5643 }, { "epoch": 1.428499114148317, "grad_norm": 0.16288568079471588, "learning_rate": 1.2412197089180187e-05, "loss": 0.5098, "step": 5644 }, { "epoch": 1.4287522146292078, "grad_norm": 0.1601967215538025, "learning_rate": 1.2409873399076533e-05, "loss": 0.4704, "step": 5645 }, { "epoch": 1.4290053151100988, "grad_norm": 0.1627446711063385, "learning_rate": 1.2407549570819955e-05, "loss": 0.5101, "step": 5646 }, { "epoch": 1.4292584155909895, "grad_norm": 0.1615554690361023, "learning_rate": 1.240522560454366e-05, "loss": 0.5033, "step": 5647 }, { "epoch": 1.4295115160718805, "grad_norm": 0.1589273065328598, "learning_rate": 1.2402901500380886e-05, "loss": 0.4868, "step": 5648 }, { "epoch": 1.4297646165527715, "grad_norm": 0.1594519317150116, "learning_rate": 1.240057725846487e-05, "loss": 0.4649, "step": 5649 }, { "epoch": 1.4300177170336623, "grad_norm": 0.1875002235174179, "learning_rate": 1.2398252878928849e-05, "loss": 0.4964, "step": 5650 }, { "epoch": 1.4302708175145533, "grad_norm": 0.16644340753555298, "learning_rate": 1.2395928361906073e-05, "loss": 0.4775, "step": 5651 }, { "epoch": 1.430523917995444, "grad_norm": 0.15583786368370056, "learning_rate": 1.2393603707529813e-05, "loss": 0.4673, "step": 5652 }, { "epoch": 1.430777018476335, "grad_norm": 0.16103693842887878, "learning_rate": 1.2391278915933329e-05, "loss": 0.4846, "step": 5653 }, { "epoch": 1.431030118957226, "grad_norm": 0.16082973778247833, "learning_rate": 1.2388953987249893e-05, "loss": 0.4723, "step": 5654 }, { "epoch": 1.4312832194381169, "grad_norm": 0.16561463475227356, "learning_rate": 1.2386628921612798e-05, "loss": 0.4832, "step": 5655 }, { "epoch": 1.4315363199190079, "grad_norm": 0.17037402093410492, "learning_rate": 1.2384303719155325e-05, "loss": 0.4914, "step": 5656 }, { "epoch": 1.4317894203998986, "grad_norm": 0.18051870167255402, "learning_rate": 1.238197838001078e-05, "loss": 0.5032, "step": 5657 }, { "epoch": 1.4320425208807896, "grad_norm": 0.16306555271148682, "learning_rate": 1.2379652904312468e-05, "loss": 0.4947, "step": 5658 }, { "epoch": 1.4322956213616806, "grad_norm": 0.17089863121509552, "learning_rate": 1.23773272921937e-05, "loss": 0.502, "step": 5659 }, { "epoch": 1.4325487218425714, "grad_norm": 0.16766834259033203, "learning_rate": 1.2375001543787802e-05, "loss": 0.4723, "step": 5660 }, { "epoch": 1.4328018223234624, "grad_norm": 0.15748408436775208, "learning_rate": 1.2372675659228103e-05, "loss": 0.4807, "step": 5661 }, { "epoch": 1.4330549228043532, "grad_norm": 0.15210938453674316, "learning_rate": 1.2370349638647943e-05, "loss": 0.4868, "step": 5662 }, { "epoch": 1.4333080232852442, "grad_norm": 0.15987281501293182, "learning_rate": 1.2368023482180666e-05, "loss": 0.4907, "step": 5663 }, { "epoch": 1.4335611237661352, "grad_norm": 0.1678691953420639, "learning_rate": 1.2365697189959625e-05, "loss": 0.4797, "step": 5664 }, { "epoch": 1.4338142242470262, "grad_norm": 0.1627076119184494, "learning_rate": 1.2363370762118183e-05, "loss": 0.5074, "step": 5665 }, { "epoch": 1.434067324727917, "grad_norm": 0.1571960151195526, "learning_rate": 1.236104419878971e-05, "loss": 0.4612, "step": 5666 }, { "epoch": 1.4343204252088078, "grad_norm": 0.16643640398979187, "learning_rate": 1.2358717500107577e-05, "loss": 0.4808, "step": 5667 }, { "epoch": 1.4345735256896988, "grad_norm": 0.1591946929693222, "learning_rate": 1.235639066620518e-05, "loss": 0.4923, "step": 5668 }, { "epoch": 1.4348266261705898, "grad_norm": 0.15949590504169464, "learning_rate": 1.2354063697215902e-05, "loss": 0.4606, "step": 5669 }, { "epoch": 1.4350797266514808, "grad_norm": 0.16784417629241943, "learning_rate": 1.2351736593273141e-05, "loss": 0.4926, "step": 5670 }, { "epoch": 1.4353328271323715, "grad_norm": 0.16074731945991516, "learning_rate": 1.2349409354510316e-05, "loss": 0.4919, "step": 5671 }, { "epoch": 1.4355859276132625, "grad_norm": 0.16040168702602386, "learning_rate": 1.2347081981060831e-05, "loss": 0.45, "step": 5672 }, { "epoch": 1.4358390280941533, "grad_norm": 0.16325727105140686, "learning_rate": 1.2344754473058116e-05, "loss": 0.5075, "step": 5673 }, { "epoch": 1.4360921285750443, "grad_norm": 0.15660667419433594, "learning_rate": 1.2342426830635606e-05, "loss": 0.4722, "step": 5674 }, { "epoch": 1.4363452290559353, "grad_norm": 0.15977470576763153, "learning_rate": 1.2340099053926728e-05, "loss": 0.4773, "step": 5675 }, { "epoch": 1.436598329536826, "grad_norm": 0.16376148164272308, "learning_rate": 1.2337771143064937e-05, "loss": 0.4883, "step": 5676 }, { "epoch": 1.436851430017717, "grad_norm": 0.16356772184371948, "learning_rate": 1.2335443098183688e-05, "loss": 0.5031, "step": 5677 }, { "epoch": 1.4371045304986079, "grad_norm": 0.15946035087108612, "learning_rate": 1.2333114919416438e-05, "loss": 0.4889, "step": 5678 }, { "epoch": 1.4373576309794989, "grad_norm": 0.1618504524230957, "learning_rate": 1.233078660689666e-05, "loss": 0.4994, "step": 5679 }, { "epoch": 1.4376107314603899, "grad_norm": 0.1682477742433548, "learning_rate": 1.2328458160757828e-05, "loss": 0.495, "step": 5680 }, { "epoch": 1.4378638319412806, "grad_norm": 0.16214655339717865, "learning_rate": 1.232612958113343e-05, "loss": 0.4848, "step": 5681 }, { "epoch": 1.4381169324221716, "grad_norm": 0.1690962165594101, "learning_rate": 1.2323800868156958e-05, "loss": 0.4947, "step": 5682 }, { "epoch": 1.4383700329030624, "grad_norm": 0.16031542420387268, "learning_rate": 1.2321472021961912e-05, "loss": 0.492, "step": 5683 }, { "epoch": 1.4386231333839534, "grad_norm": 0.1599411517381668, "learning_rate": 1.2319143042681798e-05, "loss": 0.4777, "step": 5684 }, { "epoch": 1.4388762338648444, "grad_norm": 0.1667228490114212, "learning_rate": 1.2316813930450134e-05, "loss": 0.4885, "step": 5685 }, { "epoch": 1.4391293343457352, "grad_norm": 0.17535629868507385, "learning_rate": 1.2314484685400441e-05, "loss": 0.4952, "step": 5686 }, { "epoch": 1.4393824348266262, "grad_norm": 0.1671820431947708, "learning_rate": 1.2312155307666248e-05, "loss": 0.481, "step": 5687 }, { "epoch": 1.439635535307517, "grad_norm": 0.16566871106624603, "learning_rate": 1.2309825797381101e-05, "loss": 0.4744, "step": 5688 }, { "epoch": 1.439888635788408, "grad_norm": 0.16709978878498077, "learning_rate": 1.2307496154678537e-05, "loss": 0.5173, "step": 5689 }, { "epoch": 1.440141736269299, "grad_norm": 0.1609114110469818, "learning_rate": 1.2305166379692114e-05, "loss": 0.502, "step": 5690 }, { "epoch": 1.4403948367501898, "grad_norm": 0.1624011993408203, "learning_rate": 1.2302836472555396e-05, "loss": 0.4987, "step": 5691 }, { "epoch": 1.4406479372310808, "grad_norm": 0.1617092788219452, "learning_rate": 1.2300506433401943e-05, "loss": 0.4939, "step": 5692 }, { "epoch": 1.4409010377119715, "grad_norm": 0.15826572477817535, "learning_rate": 1.229817626236534e-05, "loss": 0.4657, "step": 5693 }, { "epoch": 1.4411541381928625, "grad_norm": 0.15680430829524994, "learning_rate": 1.2295845959579165e-05, "loss": 0.4855, "step": 5694 }, { "epoch": 1.4414072386737535, "grad_norm": 0.16816596686840057, "learning_rate": 1.2293515525177008e-05, "loss": 0.459, "step": 5695 }, { "epoch": 1.4416603391546445, "grad_norm": 0.1565219759941101, "learning_rate": 1.2291184959292477e-05, "loss": 0.4727, "step": 5696 }, { "epoch": 1.4419134396355353, "grad_norm": 0.16989880800247192, "learning_rate": 1.228885426205917e-05, "loss": 0.506, "step": 5697 }, { "epoch": 1.442166540116426, "grad_norm": 0.16051672399044037, "learning_rate": 1.2286523433610697e-05, "loss": 0.4739, "step": 5698 }, { "epoch": 1.442419640597317, "grad_norm": 0.1662341058254242, "learning_rate": 1.2284192474080696e-05, "loss": 0.4808, "step": 5699 }, { "epoch": 1.442672741078208, "grad_norm": 0.16468793153762817, "learning_rate": 1.228186138360278e-05, "loss": 0.5017, "step": 5700 }, { "epoch": 1.442925841559099, "grad_norm": 0.17244714498519897, "learning_rate": 1.2279530162310592e-05, "loss": 0.4933, "step": 5701 }, { "epoch": 1.4431789420399899, "grad_norm": 0.16408121585845947, "learning_rate": 1.2277198810337779e-05, "loss": 0.5061, "step": 5702 }, { "epoch": 1.4434320425208809, "grad_norm": 0.16633616387844086, "learning_rate": 1.2274867327817983e-05, "loss": 0.4816, "step": 5703 }, { "epoch": 1.4436851430017716, "grad_norm": 0.16217078268527985, "learning_rate": 1.2272535714884873e-05, "loss": 0.5107, "step": 5704 }, { "epoch": 1.4439382434826626, "grad_norm": 0.15724056959152222, "learning_rate": 1.2270203971672107e-05, "loss": 0.4814, "step": 5705 }, { "epoch": 1.4441913439635536, "grad_norm": 0.1669279932975769, "learning_rate": 1.2267872098313368e-05, "loss": 0.4998, "step": 5706 }, { "epoch": 1.4444444444444444, "grad_norm": 0.15723727643489838, "learning_rate": 1.2265540094942328e-05, "loss": 0.4822, "step": 5707 }, { "epoch": 1.4446975449253354, "grad_norm": 0.1633288860321045, "learning_rate": 1.2263207961692683e-05, "loss": 0.4966, "step": 5708 }, { "epoch": 1.4449506454062262, "grad_norm": 0.15358422696590424, "learning_rate": 1.2260875698698123e-05, "loss": 0.479, "step": 5709 }, { "epoch": 1.4452037458871172, "grad_norm": 0.16915802657604218, "learning_rate": 1.2258543306092356e-05, "loss": 0.4955, "step": 5710 }, { "epoch": 1.4454568463680082, "grad_norm": 0.1604975461959839, "learning_rate": 1.2256210784009095e-05, "loss": 0.4979, "step": 5711 }, { "epoch": 1.445709946848899, "grad_norm": 0.16567182540893555, "learning_rate": 1.2253878132582053e-05, "loss": 0.4757, "step": 5712 }, { "epoch": 1.44596304732979, "grad_norm": 0.1625775396823883, "learning_rate": 1.2251545351944962e-05, "loss": 0.4966, "step": 5713 }, { "epoch": 1.4462161478106808, "grad_norm": 0.16544294357299805, "learning_rate": 1.2249212442231549e-05, "loss": 0.4777, "step": 5714 }, { "epoch": 1.4464692482915718, "grad_norm": 0.16091321408748627, "learning_rate": 1.224687940357556e-05, "loss": 0.4945, "step": 5715 }, { "epoch": 1.4467223487724628, "grad_norm": 0.16844846308231354, "learning_rate": 1.224454623611074e-05, "loss": 0.4994, "step": 5716 }, { "epoch": 1.4469754492533535, "grad_norm": 0.1640080064535141, "learning_rate": 1.2242212939970841e-05, "loss": 0.5119, "step": 5717 }, { "epoch": 1.4472285497342445, "grad_norm": 0.16314604878425598, "learning_rate": 1.2239879515289636e-05, "loss": 0.4972, "step": 5718 }, { "epoch": 1.4474816502151353, "grad_norm": 0.1660699099302292, "learning_rate": 1.2237545962200889e-05, "loss": 0.4882, "step": 5719 }, { "epoch": 1.4477347506960263, "grad_norm": 0.165506511926651, "learning_rate": 1.2235212280838375e-05, "loss": 0.5016, "step": 5720 }, { "epoch": 1.4479878511769173, "grad_norm": 0.15843352675437927, "learning_rate": 1.2232878471335887e-05, "loss": 0.4763, "step": 5721 }, { "epoch": 1.448240951657808, "grad_norm": 0.16315391659736633, "learning_rate": 1.223054453382721e-05, "loss": 0.5175, "step": 5722 }, { "epoch": 1.448494052138699, "grad_norm": 0.16404522955417633, "learning_rate": 1.2228210468446147e-05, "loss": 0.4762, "step": 5723 }, { "epoch": 1.4487471526195899, "grad_norm": 0.1574731320142746, "learning_rate": 1.2225876275326506e-05, "loss": 0.4726, "step": 5724 }, { "epoch": 1.4490002531004809, "grad_norm": 0.15810896456241608, "learning_rate": 1.2223541954602098e-05, "loss": 0.4953, "step": 5725 }, { "epoch": 1.4492533535813719, "grad_norm": 0.16277159750461578, "learning_rate": 1.2221207506406747e-05, "loss": 0.4763, "step": 5726 }, { "epoch": 1.4495064540622626, "grad_norm": 0.16646313667297363, "learning_rate": 1.2218872930874278e-05, "loss": 0.4778, "step": 5727 }, { "epoch": 1.4497595545431536, "grad_norm": 0.16037623584270477, "learning_rate": 1.2216538228138537e-05, "loss": 0.4788, "step": 5728 }, { "epoch": 1.4500126550240444, "grad_norm": 0.16235554218292236, "learning_rate": 1.2214203398333358e-05, "loss": 0.5112, "step": 5729 }, { "epoch": 1.4502657555049354, "grad_norm": 0.1622912883758545, "learning_rate": 1.2211868441592594e-05, "loss": 0.5012, "step": 5730 }, { "epoch": 1.4505188559858264, "grad_norm": 0.16089405119419098, "learning_rate": 1.2209533358050104e-05, "loss": 0.4794, "step": 5731 }, { "epoch": 1.4507719564667174, "grad_norm": 0.16449938714504242, "learning_rate": 1.2207198147839757e-05, "loss": 0.4866, "step": 5732 }, { "epoch": 1.4510250569476082, "grad_norm": 0.17046216130256653, "learning_rate": 1.2204862811095416e-05, "loss": 0.455, "step": 5733 }, { "epoch": 1.4512781574284992, "grad_norm": 0.16040672361850739, "learning_rate": 1.2202527347950971e-05, "loss": 0.4885, "step": 5734 }, { "epoch": 1.45153125790939, "grad_norm": 0.1650368720293045, "learning_rate": 1.220019175854031e-05, "loss": 0.4941, "step": 5735 }, { "epoch": 1.451784358390281, "grad_norm": 0.16258086264133453, "learning_rate": 1.2197856042997314e-05, "loss": 0.4998, "step": 5736 }, { "epoch": 1.452037458871172, "grad_norm": 0.16171053051948547, "learning_rate": 1.2195520201455897e-05, "loss": 0.4925, "step": 5737 }, { "epoch": 1.4522905593520627, "grad_norm": 0.16417957842350006, "learning_rate": 1.2193184234049965e-05, "loss": 0.4745, "step": 5738 }, { "epoch": 1.4525436598329537, "grad_norm": 0.16181319952011108, "learning_rate": 1.2190848140913431e-05, "loss": 0.4953, "step": 5739 }, { "epoch": 1.4527967603138445, "grad_norm": 0.16047199070453644, "learning_rate": 1.2188511922180223e-05, "loss": 0.4742, "step": 5740 }, { "epoch": 1.4530498607947355, "grad_norm": 0.17332276701927185, "learning_rate": 1.218617557798427e-05, "loss": 0.51, "step": 5741 }, { "epoch": 1.4533029612756265, "grad_norm": 0.1590789407491684, "learning_rate": 1.2183839108459503e-05, "loss": 0.488, "step": 5742 }, { "epoch": 1.4535560617565173, "grad_norm": 0.15866723656654358, "learning_rate": 1.2181502513739876e-05, "loss": 0.4917, "step": 5743 }, { "epoch": 1.4538091622374083, "grad_norm": 0.16042472422122955, "learning_rate": 1.2179165793959338e-05, "loss": 0.4541, "step": 5744 }, { "epoch": 1.454062262718299, "grad_norm": 0.15654593706130981, "learning_rate": 1.2176828949251847e-05, "loss": 0.4695, "step": 5745 }, { "epoch": 1.45431536319919, "grad_norm": 0.1601811945438385, "learning_rate": 1.2174491979751369e-05, "loss": 0.4885, "step": 5746 }, { "epoch": 1.454568463680081, "grad_norm": 0.1541527658700943, "learning_rate": 1.2172154885591879e-05, "loss": 0.4777, "step": 5747 }, { "epoch": 1.4548215641609719, "grad_norm": 0.16181185841560364, "learning_rate": 1.2169817666907356e-05, "loss": 0.5046, "step": 5748 }, { "epoch": 1.4550746646418629, "grad_norm": 0.15510854125022888, "learning_rate": 1.2167480323831789e-05, "loss": 0.4774, "step": 5749 }, { "epoch": 1.4553277651227536, "grad_norm": 0.16748224198818207, "learning_rate": 1.2165142856499172e-05, "loss": 0.5081, "step": 5750 }, { "epoch": 1.4555808656036446, "grad_norm": 0.1522534042596817, "learning_rate": 1.2162805265043511e-05, "loss": 0.4503, "step": 5751 }, { "epoch": 1.4558339660845356, "grad_norm": 0.16045406460762024, "learning_rate": 1.216046754959881e-05, "loss": 0.4984, "step": 5752 }, { "epoch": 1.4560870665654264, "grad_norm": 0.16160793602466583, "learning_rate": 1.2158129710299087e-05, "loss": 0.4588, "step": 5753 }, { "epoch": 1.4563401670463174, "grad_norm": 0.16327039897441864, "learning_rate": 1.2155791747278367e-05, "loss": 0.4872, "step": 5754 }, { "epoch": 1.4565932675272082, "grad_norm": 0.16436593234539032, "learning_rate": 1.2153453660670677e-05, "loss": 0.4989, "step": 5755 }, { "epoch": 1.4568463680080992, "grad_norm": 0.16307589411735535, "learning_rate": 1.2151115450610058e-05, "loss": 0.5121, "step": 5756 }, { "epoch": 1.4570994684889902, "grad_norm": 0.15641282498836517, "learning_rate": 1.2148777117230554e-05, "loss": 0.4871, "step": 5757 }, { "epoch": 1.457352568969881, "grad_norm": 0.15372627973556519, "learning_rate": 1.2146438660666214e-05, "loss": 0.4702, "step": 5758 }, { "epoch": 1.457605669450772, "grad_norm": 0.16509738564491272, "learning_rate": 1.21441000810511e-05, "loss": 0.4773, "step": 5759 }, { "epoch": 1.4578587699316627, "grad_norm": 0.19046425819396973, "learning_rate": 1.214176137851928e-05, "loss": 0.4993, "step": 5760 }, { "epoch": 1.4581118704125537, "grad_norm": 0.16468371450901031, "learning_rate": 1.2139422553204815e-05, "loss": 0.4716, "step": 5761 }, { "epoch": 1.4583649708934447, "grad_norm": 0.16200865805149078, "learning_rate": 1.2137083605241797e-05, "loss": 0.4794, "step": 5762 }, { "epoch": 1.4586180713743357, "grad_norm": 0.16435088217258453, "learning_rate": 1.2134744534764313e-05, "loss": 0.4871, "step": 5763 }, { "epoch": 1.4588711718552265, "grad_norm": 0.16604717075824738, "learning_rate": 1.2132405341906448e-05, "loss": 0.4965, "step": 5764 }, { "epoch": 1.4591242723361173, "grad_norm": 0.15929819643497467, "learning_rate": 1.2130066026802312e-05, "loss": 0.4726, "step": 5765 }, { "epoch": 1.4593773728170083, "grad_norm": 0.16344885528087616, "learning_rate": 1.212772658958601e-05, "loss": 0.5149, "step": 5766 }, { "epoch": 1.4596304732978993, "grad_norm": 0.16432452201843262, "learning_rate": 1.212538703039165e-05, "loss": 0.4976, "step": 5767 }, { "epoch": 1.4598835737787903, "grad_norm": 0.16266052424907684, "learning_rate": 1.2123047349353365e-05, "loss": 0.5061, "step": 5768 }, { "epoch": 1.460136674259681, "grad_norm": 0.16674935817718506, "learning_rate": 1.2120707546605277e-05, "loss": 0.4757, "step": 5769 }, { "epoch": 1.460389774740572, "grad_norm": 0.1631499081850052, "learning_rate": 1.2118367622281524e-05, "loss": 0.4928, "step": 5770 }, { "epoch": 1.4606428752214629, "grad_norm": 0.16090358793735504, "learning_rate": 1.211602757651625e-05, "loss": 0.497, "step": 5771 }, { "epoch": 1.4608959757023539, "grad_norm": 0.15965373814105988, "learning_rate": 1.2113687409443602e-05, "loss": 0.4786, "step": 5772 }, { "epoch": 1.4611490761832449, "grad_norm": 0.16418369114398956, "learning_rate": 1.2111347121197739e-05, "loss": 0.4997, "step": 5773 }, { "epoch": 1.4614021766641356, "grad_norm": 0.1573558896780014, "learning_rate": 1.2109006711912824e-05, "loss": 0.5037, "step": 5774 }, { "epoch": 1.4616552771450266, "grad_norm": 0.1616630107164383, "learning_rate": 1.2106666181723029e-05, "loss": 0.5122, "step": 5775 }, { "epoch": 1.4619083776259174, "grad_norm": 0.16610005497932434, "learning_rate": 1.210432553076253e-05, "loss": 0.4807, "step": 5776 }, { "epoch": 1.4621614781068084, "grad_norm": 0.15880464017391205, "learning_rate": 1.210198475916551e-05, "loss": 0.4942, "step": 5777 }, { "epoch": 1.4624145785876994, "grad_norm": 0.16382747888565063, "learning_rate": 1.2099643867066164e-05, "loss": 0.4895, "step": 5778 }, { "epoch": 1.4626676790685902, "grad_norm": 0.169754296541214, "learning_rate": 1.2097302854598694e-05, "loss": 0.4559, "step": 5779 }, { "epoch": 1.4629207795494812, "grad_norm": 0.15969692170619965, "learning_rate": 1.2094961721897294e-05, "loss": 0.4784, "step": 5780 }, { "epoch": 1.463173880030372, "grad_norm": 0.16562888026237488, "learning_rate": 1.2092620469096184e-05, "loss": 0.4792, "step": 5781 }, { "epoch": 1.463426980511263, "grad_norm": 0.16463537514209747, "learning_rate": 1.2090279096329582e-05, "loss": 0.4867, "step": 5782 }, { "epoch": 1.463680080992154, "grad_norm": 0.1661299467086792, "learning_rate": 1.2087937603731711e-05, "loss": 0.4851, "step": 5783 }, { "epoch": 1.4639331814730447, "grad_norm": 0.1643810272216797, "learning_rate": 1.2085595991436808e-05, "loss": 0.4893, "step": 5784 }, { "epoch": 1.4641862819539357, "grad_norm": 0.16080287098884583, "learning_rate": 1.2083254259579114e-05, "loss": 0.5018, "step": 5785 }, { "epoch": 1.4644393824348265, "grad_norm": 0.1556382179260254, "learning_rate": 1.2080912408292867e-05, "loss": 0.471, "step": 5786 }, { "epoch": 1.4646924829157175, "grad_norm": 0.16896569728851318, "learning_rate": 1.2078570437712327e-05, "loss": 0.4855, "step": 5787 }, { "epoch": 1.4649455833966085, "grad_norm": 0.16228081285953522, "learning_rate": 1.2076228347971756e-05, "loss": 0.4615, "step": 5788 }, { "epoch": 1.4651986838774993, "grad_norm": 0.1639234721660614, "learning_rate": 1.2073886139205418e-05, "loss": 0.5035, "step": 5789 }, { "epoch": 1.4654517843583903, "grad_norm": 0.16062140464782715, "learning_rate": 1.2071543811547583e-05, "loss": 0.4884, "step": 5790 }, { "epoch": 1.465704884839281, "grad_norm": 0.16014496982097626, "learning_rate": 1.206920136513254e-05, "loss": 0.4783, "step": 5791 }, { "epoch": 1.465957985320172, "grad_norm": 0.17266565561294556, "learning_rate": 1.2066858800094567e-05, "loss": 0.5039, "step": 5792 }, { "epoch": 1.466211085801063, "grad_norm": 0.16516320407390594, "learning_rate": 1.2064516116567968e-05, "loss": 0.5106, "step": 5793 }, { "epoch": 1.466464186281954, "grad_norm": 0.15866327285766602, "learning_rate": 1.2062173314687035e-05, "loss": 0.4818, "step": 5794 }, { "epoch": 1.4667172867628449, "grad_norm": 0.16338694095611572, "learning_rate": 1.2059830394586083e-05, "loss": 0.4787, "step": 5795 }, { "epoch": 1.4669703872437356, "grad_norm": 0.16666264832019806, "learning_rate": 1.2057487356399425e-05, "loss": 0.4751, "step": 5796 }, { "epoch": 1.4672234877246266, "grad_norm": 0.16126389801502228, "learning_rate": 1.205514420026138e-05, "loss": 0.4527, "step": 5797 }, { "epoch": 1.4674765882055176, "grad_norm": 0.16665463149547577, "learning_rate": 1.2052800926306275e-05, "loss": 0.4865, "step": 5798 }, { "epoch": 1.4677296886864086, "grad_norm": 0.17032888531684875, "learning_rate": 1.205045753466845e-05, "loss": 0.5025, "step": 5799 }, { "epoch": 1.4679827891672994, "grad_norm": 0.1583305299282074, "learning_rate": 1.2048114025482243e-05, "loss": 0.4825, "step": 5800 }, { "epoch": 1.4682358896481904, "grad_norm": 0.1614544689655304, "learning_rate": 1.2045770398882006e-05, "loss": 0.4716, "step": 5801 }, { "epoch": 1.4684889901290812, "grad_norm": 0.16106119751930237, "learning_rate": 1.204342665500209e-05, "loss": 0.4687, "step": 5802 }, { "epoch": 1.4687420906099722, "grad_norm": 0.1640172153711319, "learning_rate": 1.2041082793976856e-05, "loss": 0.4733, "step": 5803 }, { "epoch": 1.4689951910908632, "grad_norm": 0.15972398221492767, "learning_rate": 1.2038738815940678e-05, "loss": 0.5034, "step": 5804 }, { "epoch": 1.469248291571754, "grad_norm": 0.16015274822711945, "learning_rate": 1.2036394721027927e-05, "loss": 0.4329, "step": 5805 }, { "epoch": 1.469501392052645, "grad_norm": 0.15918336808681488, "learning_rate": 1.2034050509372986e-05, "loss": 0.4912, "step": 5806 }, { "epoch": 1.4697544925335357, "grad_norm": 0.165705144405365, "learning_rate": 1.2031706181110247e-05, "loss": 0.4734, "step": 5807 }, { "epoch": 1.4700075930144267, "grad_norm": 0.1591646671295166, "learning_rate": 1.2029361736374099e-05, "loss": 0.4829, "step": 5808 }, { "epoch": 1.4702606934953177, "grad_norm": 0.1582026332616806, "learning_rate": 1.2027017175298947e-05, "loss": 0.4526, "step": 5809 }, { "epoch": 1.4705137939762085, "grad_norm": 0.16407866775989532, "learning_rate": 1.2024672498019205e-05, "loss": 0.4862, "step": 5810 }, { "epoch": 1.4707668944570995, "grad_norm": 0.15521539747714996, "learning_rate": 1.2022327704669281e-05, "loss": 0.4811, "step": 5811 }, { "epoch": 1.4710199949379903, "grad_norm": 0.1658594310283661, "learning_rate": 1.2019982795383598e-05, "loss": 0.5191, "step": 5812 }, { "epoch": 1.4712730954188813, "grad_norm": 0.1591573804616928, "learning_rate": 1.2017637770296586e-05, "loss": 0.4629, "step": 5813 }, { "epoch": 1.4715261958997723, "grad_norm": 0.15894004702568054, "learning_rate": 1.2015292629542682e-05, "loss": 0.5104, "step": 5814 }, { "epoch": 1.471779296380663, "grad_norm": 0.1631624698638916, "learning_rate": 1.2012947373256325e-05, "loss": 0.4893, "step": 5815 }, { "epoch": 1.472032396861554, "grad_norm": 0.16077227890491486, "learning_rate": 1.2010602001571966e-05, "loss": 0.466, "step": 5816 }, { "epoch": 1.4722854973424448, "grad_norm": 0.16395552456378937, "learning_rate": 1.2008256514624062e-05, "loss": 0.4817, "step": 5817 }, { "epoch": 1.4725385978233358, "grad_norm": 0.16444537043571472, "learning_rate": 1.2005910912547067e-05, "loss": 0.4829, "step": 5818 }, { "epoch": 1.4727916983042268, "grad_norm": 0.165321484208107, "learning_rate": 1.2003565195475457e-05, "loss": 0.4987, "step": 5819 }, { "epoch": 1.4730447987851176, "grad_norm": 0.1648806929588318, "learning_rate": 1.2001219363543703e-05, "loss": 0.4871, "step": 5820 }, { "epoch": 1.4732978992660086, "grad_norm": 0.1967431753873825, "learning_rate": 1.199887341688629e-05, "loss": 0.4712, "step": 5821 }, { "epoch": 1.4735509997468994, "grad_norm": 0.1697954386472702, "learning_rate": 1.1996527355637704e-05, "loss": 0.495, "step": 5822 }, { "epoch": 1.4738041002277904, "grad_norm": 0.16169969737529755, "learning_rate": 1.1994181179932438e-05, "loss": 0.4824, "step": 5823 }, { "epoch": 1.4740572007086814, "grad_norm": 0.17577211558818817, "learning_rate": 1.1991834889904997e-05, "loss": 0.4664, "step": 5824 }, { "epoch": 1.4743103011895722, "grad_norm": 0.16793079674243927, "learning_rate": 1.1989488485689886e-05, "loss": 0.5006, "step": 5825 }, { "epoch": 1.4745634016704632, "grad_norm": 0.16746817529201508, "learning_rate": 1.1987141967421623e-05, "loss": 0.4778, "step": 5826 }, { "epoch": 1.474816502151354, "grad_norm": 0.16777007281780243, "learning_rate": 1.1984795335234721e-05, "loss": 0.481, "step": 5827 }, { "epoch": 1.475069602632245, "grad_norm": 0.16335664689540863, "learning_rate": 1.1982448589263715e-05, "loss": 0.4861, "step": 5828 }, { "epoch": 1.475322703113136, "grad_norm": 0.16473783552646637, "learning_rate": 1.1980101729643139e-05, "loss": 0.4615, "step": 5829 }, { "epoch": 1.475575803594027, "grad_norm": 0.16483576595783234, "learning_rate": 1.1977754756507527e-05, "loss": 0.5116, "step": 5830 }, { "epoch": 1.4758289040749177, "grad_norm": 0.15921412408351898, "learning_rate": 1.1975407669991433e-05, "loss": 0.5147, "step": 5831 }, { "epoch": 1.4760820045558087, "grad_norm": 0.1640428900718689, "learning_rate": 1.1973060470229407e-05, "loss": 0.508, "step": 5832 }, { "epoch": 1.4763351050366995, "grad_norm": 0.15973317623138428, "learning_rate": 1.197071315735601e-05, "loss": 0.4769, "step": 5833 }, { "epoch": 1.4765882055175905, "grad_norm": 0.17910632491111755, "learning_rate": 1.1968365731505805e-05, "loss": 0.5212, "step": 5834 }, { "epoch": 1.4768413059984815, "grad_norm": 0.16700610518455505, "learning_rate": 1.1966018192813374e-05, "loss": 0.4773, "step": 5835 }, { "epoch": 1.4770944064793723, "grad_norm": 0.15630199015140533, "learning_rate": 1.1963670541413285e-05, "loss": 0.4612, "step": 5836 }, { "epoch": 1.4773475069602633, "grad_norm": 0.16526365280151367, "learning_rate": 1.1961322777440131e-05, "loss": 0.4918, "step": 5837 }, { "epoch": 1.477600607441154, "grad_norm": 0.16500070691108704, "learning_rate": 1.1958974901028503e-05, "loss": 0.4979, "step": 5838 }, { "epoch": 1.477853707922045, "grad_norm": 0.16655465960502625, "learning_rate": 1.1956626912312998e-05, "loss": 0.4708, "step": 5839 }, { "epoch": 1.478106808402936, "grad_norm": 0.15939493477344513, "learning_rate": 1.1954278811428224e-05, "loss": 0.4731, "step": 5840 }, { "epoch": 1.4783599088838268, "grad_norm": 0.15629541873931885, "learning_rate": 1.1951930598508792e-05, "loss": 0.4679, "step": 5841 }, { "epoch": 1.4786130093647178, "grad_norm": 0.16339796781539917, "learning_rate": 1.1949582273689317e-05, "loss": 0.4795, "step": 5842 }, { "epoch": 1.4788661098456086, "grad_norm": 0.16105616092681885, "learning_rate": 1.1947233837104427e-05, "loss": 0.4907, "step": 5843 }, { "epoch": 1.4791192103264996, "grad_norm": 0.1675543338060379, "learning_rate": 1.1944885288888752e-05, "loss": 0.4821, "step": 5844 }, { "epoch": 1.4793723108073906, "grad_norm": 0.16225238144397736, "learning_rate": 1.1942536629176928e-05, "loss": 0.4848, "step": 5845 }, { "epoch": 1.4796254112882814, "grad_norm": 0.1653982549905777, "learning_rate": 1.1940187858103601e-05, "loss": 0.4959, "step": 5846 }, { "epoch": 1.4798785117691724, "grad_norm": 0.1801934838294983, "learning_rate": 1.1937838975803417e-05, "loss": 0.4869, "step": 5847 }, { "epoch": 1.4801316122500632, "grad_norm": 0.16900648176670074, "learning_rate": 1.193548998241104e-05, "loss": 0.4827, "step": 5848 }, { "epoch": 1.4803847127309542, "grad_norm": 0.15989217162132263, "learning_rate": 1.1933140878061123e-05, "loss": 0.4861, "step": 5849 }, { "epoch": 1.4806378132118452, "grad_norm": 0.1617344617843628, "learning_rate": 1.1930791662888343e-05, "loss": 0.499, "step": 5850 }, { "epoch": 1.480890913692736, "grad_norm": 0.16289415955543518, "learning_rate": 1.1928442337027373e-05, "loss": 0.4719, "step": 5851 }, { "epoch": 1.481144014173627, "grad_norm": 0.1640748679637909, "learning_rate": 1.1926092900612893e-05, "loss": 0.5123, "step": 5852 }, { "epoch": 1.4813971146545177, "grad_norm": 0.1614280492067337, "learning_rate": 1.192374335377959e-05, "loss": 0.4797, "step": 5853 }, { "epoch": 1.4816502151354087, "grad_norm": 0.16461366415023804, "learning_rate": 1.1921393696662166e-05, "loss": 0.5039, "step": 5854 }, { "epoch": 1.4819033156162997, "grad_norm": 0.15998253226280212, "learning_rate": 1.1919043929395317e-05, "loss": 0.4702, "step": 5855 }, { "epoch": 1.4821564160971905, "grad_norm": 0.1576930731534958, "learning_rate": 1.1916694052113747e-05, "loss": 0.4971, "step": 5856 }, { "epoch": 1.4824095165780815, "grad_norm": 0.17838329076766968, "learning_rate": 1.1914344064952176e-05, "loss": 0.493, "step": 5857 }, { "epoch": 1.4826626170589723, "grad_norm": 0.16263729333877563, "learning_rate": 1.1911993968045319e-05, "loss": 0.4734, "step": 5858 }, { "epoch": 1.4829157175398633, "grad_norm": 0.15968771278858185, "learning_rate": 1.19096437615279e-05, "loss": 0.4705, "step": 5859 }, { "epoch": 1.4831688180207543, "grad_norm": 0.16493210196495056, "learning_rate": 1.1907293445534663e-05, "loss": 0.4605, "step": 5860 }, { "epoch": 1.4834219185016453, "grad_norm": 0.16574108600616455, "learning_rate": 1.1904943020200333e-05, "loss": 0.4667, "step": 5861 }, { "epoch": 1.483675018982536, "grad_norm": 0.16704745590686798, "learning_rate": 1.1902592485659662e-05, "loss": 0.4901, "step": 5862 }, { "epoch": 1.4839281194634268, "grad_norm": 0.1580095887184143, "learning_rate": 1.19002418420474e-05, "loss": 0.4996, "step": 5863 }, { "epoch": 1.4841812199443178, "grad_norm": 0.15958534181118011, "learning_rate": 1.1897891089498303e-05, "loss": 0.4488, "step": 5864 }, { "epoch": 1.4844343204252088, "grad_norm": 0.16397030651569366, "learning_rate": 1.1895540228147136e-05, "loss": 0.501, "step": 5865 }, { "epoch": 1.4846874209060998, "grad_norm": 0.16115999221801758, "learning_rate": 1.1893189258128667e-05, "loss": 0.4935, "step": 5866 }, { "epoch": 1.4849405213869906, "grad_norm": 0.15969108045101166, "learning_rate": 1.1890838179577677e-05, "loss": 0.4595, "step": 5867 }, { "epoch": 1.4851936218678816, "grad_norm": 0.1616068184375763, "learning_rate": 1.1888486992628942e-05, "loss": 0.4833, "step": 5868 }, { "epoch": 1.4854467223487724, "grad_norm": 0.15777815878391266, "learning_rate": 1.1886135697417256e-05, "loss": 0.4715, "step": 5869 }, { "epoch": 1.4856998228296634, "grad_norm": 0.16814321279525757, "learning_rate": 1.1883784294077412e-05, "loss": 0.5068, "step": 5870 }, { "epoch": 1.4859529233105544, "grad_norm": 0.1620745062828064, "learning_rate": 1.1881432782744209e-05, "loss": 0.4964, "step": 5871 }, { "epoch": 1.4862060237914452, "grad_norm": 0.16326087713241577, "learning_rate": 1.1879081163552454e-05, "loss": 0.4735, "step": 5872 }, { "epoch": 1.4864591242723362, "grad_norm": 0.16050764918327332, "learning_rate": 1.1876729436636967e-05, "loss": 0.4897, "step": 5873 }, { "epoch": 1.486712224753227, "grad_norm": 0.16221944987773895, "learning_rate": 1.1874377602132559e-05, "loss": 0.5063, "step": 5874 }, { "epoch": 1.486965325234118, "grad_norm": 0.1585109978914261, "learning_rate": 1.1872025660174056e-05, "loss": 0.4819, "step": 5875 }, { "epoch": 1.487218425715009, "grad_norm": 0.1590246707201004, "learning_rate": 1.1869673610896298e-05, "loss": 0.4855, "step": 5876 }, { "epoch": 1.4874715261958997, "grad_norm": 0.16256093978881836, "learning_rate": 1.1867321454434115e-05, "loss": 0.485, "step": 5877 }, { "epoch": 1.4877246266767907, "grad_norm": 0.16388507187366486, "learning_rate": 1.1864969190922354e-05, "loss": 0.4805, "step": 5878 }, { "epoch": 1.4879777271576815, "grad_norm": 0.17410242557525635, "learning_rate": 1.1862616820495867e-05, "loss": 0.5026, "step": 5879 }, { "epoch": 1.4882308276385725, "grad_norm": 0.17078374326229095, "learning_rate": 1.186026434328951e-05, "loss": 0.5012, "step": 5880 }, { "epoch": 1.4884839281194635, "grad_norm": 0.15763680636882782, "learning_rate": 1.1857911759438138e-05, "loss": 0.4864, "step": 5881 }, { "epoch": 1.4887370286003543, "grad_norm": 0.16393430531024933, "learning_rate": 1.1855559069076631e-05, "loss": 0.4521, "step": 5882 }, { "epoch": 1.4889901290812453, "grad_norm": 0.1699652224779129, "learning_rate": 1.185320627233986e-05, "loss": 0.5007, "step": 5883 }, { "epoch": 1.489243229562136, "grad_norm": 0.1657610833644867, "learning_rate": 1.18508533693627e-05, "loss": 0.5, "step": 5884 }, { "epoch": 1.489496330043027, "grad_norm": 0.15799306333065033, "learning_rate": 1.1848500360280047e-05, "loss": 0.4751, "step": 5885 }, { "epoch": 1.489749430523918, "grad_norm": 0.2690700888633728, "learning_rate": 1.1846147245226787e-05, "loss": 0.4828, "step": 5886 }, { "epoch": 1.4900025310048088, "grad_norm": 0.16358894109725952, "learning_rate": 1.1843794024337822e-05, "loss": 0.5009, "step": 5887 }, { "epoch": 1.4902556314856998, "grad_norm": 0.15856586396694183, "learning_rate": 1.1841440697748057e-05, "loss": 0.4639, "step": 5888 }, { "epoch": 1.4905087319665906, "grad_norm": 0.16514503955841064, "learning_rate": 1.1839087265592401e-05, "loss": 0.4864, "step": 5889 }, { "epoch": 1.4907618324474816, "grad_norm": 0.16139115393161774, "learning_rate": 1.1836733728005775e-05, "loss": 0.4789, "step": 5890 }, { "epoch": 1.4910149329283726, "grad_norm": 0.16581177711486816, "learning_rate": 1.1834380085123101e-05, "loss": 0.46, "step": 5891 }, { "epoch": 1.4912680334092636, "grad_norm": 0.1626168191432953, "learning_rate": 1.1832026337079305e-05, "loss": 0.5144, "step": 5892 }, { "epoch": 1.4915211338901544, "grad_norm": 0.1710842251777649, "learning_rate": 1.1829672484009329e-05, "loss": 0.466, "step": 5893 }, { "epoch": 1.4917742343710452, "grad_norm": 0.15640468895435333, "learning_rate": 1.1827318526048108e-05, "loss": 0.4534, "step": 5894 }, { "epoch": 1.4920273348519362, "grad_norm": 0.16628283262252808, "learning_rate": 1.1824964463330593e-05, "loss": 0.4937, "step": 5895 }, { "epoch": 1.4922804353328272, "grad_norm": 0.1630641520023346, "learning_rate": 1.1822610295991738e-05, "loss": 0.4905, "step": 5896 }, { "epoch": 1.4925335358137182, "grad_norm": 0.16427187621593475, "learning_rate": 1.18202560241665e-05, "loss": 0.5041, "step": 5897 }, { "epoch": 1.492786636294609, "grad_norm": 0.16634871065616608, "learning_rate": 1.1817901647989847e-05, "loss": 0.4971, "step": 5898 }, { "epoch": 1.4930397367755, "grad_norm": 0.16148781776428223, "learning_rate": 1.1815547167596745e-05, "loss": 0.5004, "step": 5899 }, { "epoch": 1.4932928372563907, "grad_norm": 0.15991519391536713, "learning_rate": 1.1813192583122176e-05, "loss": 0.466, "step": 5900 }, { "epoch": 1.4935459377372817, "grad_norm": 0.20792219042778015, "learning_rate": 1.1810837894701124e-05, "loss": 0.4756, "step": 5901 }, { "epoch": 1.4937990382181727, "grad_norm": 0.16134712100028992, "learning_rate": 1.1808483102468575e-05, "loss": 0.4839, "step": 5902 }, { "epoch": 1.4940521386990635, "grad_norm": 0.16217948496341705, "learning_rate": 1.1806128206559527e-05, "loss": 0.4741, "step": 5903 }, { "epoch": 1.4943052391799545, "grad_norm": 0.16173110902309418, "learning_rate": 1.180377320710898e-05, "loss": 0.467, "step": 5904 }, { "epoch": 1.4945583396608453, "grad_norm": 0.16190439462661743, "learning_rate": 1.1801418104251942e-05, "loss": 0.5031, "step": 5905 }, { "epoch": 1.4948114401417363, "grad_norm": 0.1682378649711609, "learning_rate": 1.1799062898123423e-05, "loss": 0.4847, "step": 5906 }, { "epoch": 1.4950645406226273, "grad_norm": 0.1587132215499878, "learning_rate": 1.1796707588858448e-05, "loss": 0.4914, "step": 5907 }, { "epoch": 1.495317641103518, "grad_norm": 0.17452004551887512, "learning_rate": 1.1794352176592035e-05, "loss": 0.4904, "step": 5908 }, { "epoch": 1.495570741584409, "grad_norm": 0.16511070728302002, "learning_rate": 1.179199666145922e-05, "loss": 0.5048, "step": 5909 }, { "epoch": 1.4958238420652998, "grad_norm": 0.16185569763183594, "learning_rate": 1.1789641043595034e-05, "loss": 0.4639, "step": 5910 }, { "epoch": 1.4960769425461908, "grad_norm": 0.16766321659088135, "learning_rate": 1.1787285323134527e-05, "loss": 0.5014, "step": 5911 }, { "epoch": 1.4963300430270818, "grad_norm": 0.15922997891902924, "learning_rate": 1.1784929500212743e-05, "loss": 0.4667, "step": 5912 }, { "epoch": 1.4965831435079726, "grad_norm": 0.17257952690124512, "learning_rate": 1.1782573574964737e-05, "loss": 0.5022, "step": 5913 }, { "epoch": 1.4968362439888636, "grad_norm": 0.17135365307331085, "learning_rate": 1.1780217547525568e-05, "loss": 0.4874, "step": 5914 }, { "epoch": 1.4970893444697544, "grad_norm": 0.16181957721710205, "learning_rate": 1.1777861418030305e-05, "loss": 0.5027, "step": 5915 }, { "epoch": 1.4973424449506454, "grad_norm": 0.1618236005306244, "learning_rate": 1.1775505186614018e-05, "loss": 0.5168, "step": 5916 }, { "epoch": 1.4975955454315364, "grad_norm": 0.16289527714252472, "learning_rate": 1.1773148853411785e-05, "loss": 0.5027, "step": 5917 }, { "epoch": 1.4978486459124272, "grad_norm": 0.16083231568336487, "learning_rate": 1.177079241855869e-05, "loss": 0.4935, "step": 5918 }, { "epoch": 1.4981017463933182, "grad_norm": 0.16526301205158234, "learning_rate": 1.1768435882189817e-05, "loss": 0.4836, "step": 5919 }, { "epoch": 1.498354846874209, "grad_norm": 0.16205398738384247, "learning_rate": 1.176607924444027e-05, "loss": 0.4685, "step": 5920 }, { "epoch": 1.4986079473551, "grad_norm": 0.18193435668945312, "learning_rate": 1.1763722505445148e-05, "loss": 0.523, "step": 5921 }, { "epoch": 1.498861047835991, "grad_norm": 0.1606685072183609, "learning_rate": 1.1761365665339552e-05, "loss": 0.4785, "step": 5922 }, { "epoch": 1.4991141483168817, "grad_norm": 0.15893691778182983, "learning_rate": 1.1759008724258605e-05, "loss": 0.4874, "step": 5923 }, { "epoch": 1.4993672487977727, "grad_norm": 0.1610843539237976, "learning_rate": 1.1756651682337414e-05, "loss": 0.4992, "step": 5924 }, { "epoch": 1.4996203492786635, "grad_norm": 0.16432151198387146, "learning_rate": 1.1754294539711107e-05, "loss": 0.479, "step": 5925 }, { "epoch": 1.4998734497595545, "grad_norm": 0.2186376303434372, "learning_rate": 1.175193729651482e-05, "loss": 0.4798, "step": 5926 }, { "epoch": 1.5001265502404455, "grad_norm": 0.16622139513492584, "learning_rate": 1.1749579952883681e-05, "loss": 0.4706, "step": 5927 }, { "epoch": 1.5003796507213365, "grad_norm": 0.16819915175437927, "learning_rate": 1.1747222508952837e-05, "loss": 0.4879, "step": 5928 }, { "epoch": 1.5006327512022273, "grad_norm": 0.1630556583404541, "learning_rate": 1.1744864964857429e-05, "loss": 0.4781, "step": 5929 }, { "epoch": 1.500885851683118, "grad_norm": 0.16394157707691193, "learning_rate": 1.1742507320732618e-05, "loss": 0.4751, "step": 5930 }, { "epoch": 1.501138952164009, "grad_norm": 0.15963391959667206, "learning_rate": 1.1740149576713555e-05, "loss": 0.5021, "step": 5931 }, { "epoch": 1.5013920526449, "grad_norm": 0.16086708009243011, "learning_rate": 1.1737791732935409e-05, "loss": 0.5028, "step": 5932 }, { "epoch": 1.501645153125791, "grad_norm": 0.16566701233386993, "learning_rate": 1.173543378953335e-05, "loss": 0.4786, "step": 5933 }, { "epoch": 1.5018982536066818, "grad_norm": 0.17033334076404572, "learning_rate": 1.1733075746642553e-05, "loss": 0.491, "step": 5934 }, { "epoch": 1.5021513540875726, "grad_norm": 0.280536025762558, "learning_rate": 1.1730717604398199e-05, "loss": 0.496, "step": 5935 }, { "epoch": 1.5024044545684636, "grad_norm": 0.1647605448961258, "learning_rate": 1.1728359362935477e-05, "loss": 0.5228, "step": 5936 }, { "epoch": 1.5026575550493546, "grad_norm": 0.164151132106781, "learning_rate": 1.1726001022389582e-05, "loss": 0.4577, "step": 5937 }, { "epoch": 1.5029106555302456, "grad_norm": 0.1639883667230606, "learning_rate": 1.1723642582895703e-05, "loss": 0.4762, "step": 5938 }, { "epoch": 1.5031637560111364, "grad_norm": 0.1668568253517151, "learning_rate": 1.1721284044589055e-05, "loss": 0.5241, "step": 5939 }, { "epoch": 1.5034168564920274, "grad_norm": 0.17034906148910522, "learning_rate": 1.1718925407604843e-05, "loss": 0.4672, "step": 5940 }, { "epoch": 1.5036699569729182, "grad_norm": 0.16274990141391754, "learning_rate": 1.1716566672078283e-05, "loss": 0.4639, "step": 5941 }, { "epoch": 1.5039230574538092, "grad_norm": 0.16464565694332123, "learning_rate": 1.1714207838144598e-05, "loss": 0.48, "step": 5942 }, { "epoch": 1.5041761579347002, "grad_norm": 0.16076906025409698, "learning_rate": 1.1711848905939017e-05, "loss": 0.4667, "step": 5943 }, { "epoch": 1.504429258415591, "grad_norm": 0.17588527500629425, "learning_rate": 1.1709489875596762e-05, "loss": 0.4758, "step": 5944 }, { "epoch": 1.504682358896482, "grad_norm": 0.18343333899974823, "learning_rate": 1.1707130747253083e-05, "loss": 0.482, "step": 5945 }, { "epoch": 1.5049354593773727, "grad_norm": 0.16283582150936127, "learning_rate": 1.1704771521043221e-05, "loss": 0.5095, "step": 5946 }, { "epoch": 1.5051885598582637, "grad_norm": 0.1676919162273407, "learning_rate": 1.1702412197102422e-05, "loss": 0.4804, "step": 5947 }, { "epoch": 1.5054416603391547, "grad_norm": 0.16366863250732422, "learning_rate": 1.1700052775565944e-05, "loss": 0.4855, "step": 5948 }, { "epoch": 1.5056947608200457, "grad_norm": 0.16693025827407837, "learning_rate": 1.1697693256569047e-05, "loss": 0.4993, "step": 5949 }, { "epoch": 1.5059478613009365, "grad_norm": 0.1585821509361267, "learning_rate": 1.1695333640246995e-05, "loss": 0.4743, "step": 5950 }, { "epoch": 1.5062009617818273, "grad_norm": 0.17951253056526184, "learning_rate": 1.1692973926735062e-05, "loss": 0.4701, "step": 5951 }, { "epoch": 1.5064540622627183, "grad_norm": 0.16044266521930695, "learning_rate": 1.1690614116168527e-05, "loss": 0.482, "step": 5952 }, { "epoch": 1.5067071627436093, "grad_norm": 0.168381929397583, "learning_rate": 1.1688254208682667e-05, "loss": 0.4981, "step": 5953 }, { "epoch": 1.5069602632245003, "grad_norm": 0.1618499904870987, "learning_rate": 1.1685894204412777e-05, "loss": 0.4827, "step": 5954 }, { "epoch": 1.507213363705391, "grad_norm": 0.15869082510471344, "learning_rate": 1.168353410349415e-05, "loss": 0.4784, "step": 5955 }, { "epoch": 1.5074664641862818, "grad_norm": 0.16713230311870575, "learning_rate": 1.1681173906062079e-05, "loss": 0.4723, "step": 5956 }, { "epoch": 1.5077195646671728, "grad_norm": 0.16455627977848053, "learning_rate": 1.167881361225188e-05, "loss": 0.4847, "step": 5957 }, { "epoch": 1.5079726651480638, "grad_norm": 0.16428649425506592, "learning_rate": 1.1676453222198854e-05, "loss": 0.4998, "step": 5958 }, { "epoch": 1.5082257656289548, "grad_norm": 0.15708988904953003, "learning_rate": 1.1674092736038325e-05, "loss": 0.4818, "step": 5959 }, { "epoch": 1.5084788661098456, "grad_norm": 0.15574802458286285, "learning_rate": 1.1671732153905603e-05, "loss": 0.4765, "step": 5960 }, { "epoch": 1.5087319665907364, "grad_norm": 0.15880124270915985, "learning_rate": 1.1669371475936029e-05, "loss": 0.4944, "step": 5961 }, { "epoch": 1.5089850670716274, "grad_norm": 0.16741500794887543, "learning_rate": 1.166701070226493e-05, "loss": 0.4961, "step": 5962 }, { "epoch": 1.5092381675525184, "grad_norm": 0.157860666513443, "learning_rate": 1.166464983302764e-05, "loss": 0.4651, "step": 5963 }, { "epoch": 1.5094912680334094, "grad_norm": 0.16472415626049042, "learning_rate": 1.1662288868359506e-05, "loss": 0.521, "step": 5964 }, { "epoch": 1.5097443685143002, "grad_norm": 0.1603134721517563, "learning_rate": 1.165992780839588e-05, "loss": 0.4947, "step": 5965 }, { "epoch": 1.509997468995191, "grad_norm": 0.16104243695735931, "learning_rate": 1.1657566653272113e-05, "loss": 0.4998, "step": 5966 }, { "epoch": 1.510250569476082, "grad_norm": 0.16636405885219574, "learning_rate": 1.1655205403123566e-05, "loss": 0.4753, "step": 5967 }, { "epoch": 1.510503669956973, "grad_norm": 0.16175980865955353, "learning_rate": 1.1652844058085605e-05, "loss": 0.4784, "step": 5968 }, { "epoch": 1.510756770437864, "grad_norm": 0.16074435412883759, "learning_rate": 1.1650482618293598e-05, "loss": 0.4926, "step": 5969 }, { "epoch": 1.5110098709187547, "grad_norm": 0.17279845476150513, "learning_rate": 1.1648121083882927e-05, "loss": 0.4869, "step": 5970 }, { "epoch": 1.5112629713996455, "grad_norm": 0.1586635261774063, "learning_rate": 1.1645759454988968e-05, "loss": 0.5042, "step": 5971 }, { "epoch": 1.5115160718805365, "grad_norm": 0.15919551253318787, "learning_rate": 1.164339773174711e-05, "loss": 0.4843, "step": 5972 }, { "epoch": 1.5117691723614275, "grad_norm": 0.16355308890342712, "learning_rate": 1.1641035914292747e-05, "loss": 0.5015, "step": 5973 }, { "epoch": 1.5120222728423185, "grad_norm": 0.16178403794765472, "learning_rate": 1.1638674002761275e-05, "loss": 0.4748, "step": 5974 }, { "epoch": 1.5122753733232093, "grad_norm": 0.1626816838979721, "learning_rate": 1.1636311997288099e-05, "loss": 0.4783, "step": 5975 }, { "epoch": 1.5125284738041003, "grad_norm": 0.16628943383693695, "learning_rate": 1.1633949898008628e-05, "loss": 0.4902, "step": 5976 }, { "epoch": 1.512781574284991, "grad_norm": 0.15732546150684357, "learning_rate": 1.1631587705058274e-05, "loss": 0.4726, "step": 5977 }, { "epoch": 1.513034674765882, "grad_norm": 0.1644376814365387, "learning_rate": 1.1629225418572458e-05, "loss": 0.5392, "step": 5978 }, { "epoch": 1.513287775246773, "grad_norm": 0.1609513759613037, "learning_rate": 1.1626863038686606e-05, "loss": 0.4943, "step": 5979 }, { "epoch": 1.513540875727664, "grad_norm": 0.1806795299053192, "learning_rate": 1.1624500565536145e-05, "loss": 0.4767, "step": 5980 }, { "epoch": 1.5137939762085548, "grad_norm": 0.16703838109970093, "learning_rate": 1.1622137999256516e-05, "loss": 0.4943, "step": 5981 }, { "epoch": 1.5140470766894456, "grad_norm": 0.16569307446479797, "learning_rate": 1.1619775339983153e-05, "loss": 0.4778, "step": 5982 }, { "epoch": 1.5143001771703366, "grad_norm": 0.16989925503730774, "learning_rate": 1.1617412587851507e-05, "loss": 0.4933, "step": 5983 }, { "epoch": 1.5145532776512276, "grad_norm": 0.1684577763080597, "learning_rate": 1.161504974299703e-05, "loss": 0.4947, "step": 5984 }, { "epoch": 1.5148063781321186, "grad_norm": 0.16557830572128296, "learning_rate": 1.1612686805555173e-05, "loss": 0.4969, "step": 5985 }, { "epoch": 1.5150594786130094, "grad_norm": 0.16279537975788116, "learning_rate": 1.1610323775661405e-05, "loss": 0.4882, "step": 5986 }, { "epoch": 1.5153125790939002, "grad_norm": 0.16819128394126892, "learning_rate": 1.1607960653451194e-05, "loss": 0.5092, "step": 5987 }, { "epoch": 1.5155656795747912, "grad_norm": 0.16578629612922668, "learning_rate": 1.1605597439060004e-05, "loss": 0.5056, "step": 5988 }, { "epoch": 1.5158187800556822, "grad_norm": 0.16279838979244232, "learning_rate": 1.160323413262332e-05, "loss": 0.494, "step": 5989 }, { "epoch": 1.5160718805365732, "grad_norm": 0.1613675057888031, "learning_rate": 1.1600870734276625e-05, "loss": 0.4915, "step": 5990 }, { "epoch": 1.516324981017464, "grad_norm": 0.1662093997001648, "learning_rate": 1.1598507244155406e-05, "loss": 0.4955, "step": 5991 }, { "epoch": 1.5165780814983547, "grad_norm": 0.16456490755081177, "learning_rate": 1.1596143662395159e-05, "loss": 0.4804, "step": 5992 }, { "epoch": 1.5168311819792457, "grad_norm": 0.17414537072181702, "learning_rate": 1.1593779989131382e-05, "loss": 0.4847, "step": 5993 }, { "epoch": 1.5170842824601367, "grad_norm": 0.15841169655323029, "learning_rate": 1.1591416224499578e-05, "loss": 0.4851, "step": 5994 }, { "epoch": 1.5173373829410277, "grad_norm": 0.16263100504875183, "learning_rate": 1.1589052368635257e-05, "loss": 0.4754, "step": 5995 }, { "epoch": 1.5175904834219185, "grad_norm": 0.1577397882938385, "learning_rate": 1.1586688421673936e-05, "loss": 0.4937, "step": 5996 }, { "epoch": 1.5178435839028093, "grad_norm": 0.1639324575662613, "learning_rate": 1.158432438375113e-05, "loss": 0.5232, "step": 5997 }, { "epoch": 1.5180966843837003, "grad_norm": 0.17356815934181213, "learning_rate": 1.1581960255002371e-05, "loss": 0.4791, "step": 5998 }, { "epoch": 1.5183497848645913, "grad_norm": 0.16451609134674072, "learning_rate": 1.1579596035563186e-05, "loss": 0.4888, "step": 5999 }, { "epoch": 1.5186028853454823, "grad_norm": 0.16927769780158997, "learning_rate": 1.1577231725569108e-05, "loss": 0.5061, "step": 6000 }, { "epoch": 1.518855985826373, "grad_norm": 0.15791696310043335, "learning_rate": 1.1574867325155685e-05, "loss": 0.4792, "step": 6001 }, { "epoch": 1.5191090863072638, "grad_norm": 0.16223454475402832, "learning_rate": 1.1572502834458457e-05, "loss": 0.4805, "step": 6002 }, { "epoch": 1.5193621867881548, "grad_norm": 0.1632116585969925, "learning_rate": 1.1570138253612978e-05, "loss": 0.4773, "step": 6003 }, { "epoch": 1.5196152872690458, "grad_norm": 0.16461560130119324, "learning_rate": 1.1567773582754803e-05, "loss": 0.5195, "step": 6004 }, { "epoch": 1.5198683877499368, "grad_norm": 0.16585107147693634, "learning_rate": 1.1565408822019495e-05, "loss": 0.5063, "step": 6005 }, { "epoch": 1.5201214882308276, "grad_norm": 0.16680742800235748, "learning_rate": 1.1563043971542618e-05, "loss": 0.5029, "step": 6006 }, { "epoch": 1.5203745887117186, "grad_norm": 0.16634473204612732, "learning_rate": 1.1560679031459747e-05, "loss": 0.4696, "step": 6007 }, { "epoch": 1.5206276891926094, "grad_norm": 0.16153362393379211, "learning_rate": 1.1558314001906457e-05, "loss": 0.5054, "step": 6008 }, { "epoch": 1.5208807896735004, "grad_norm": 0.16502128541469574, "learning_rate": 1.1555948883018332e-05, "loss": 0.4701, "step": 6009 }, { "epoch": 1.5211338901543914, "grad_norm": 0.16946497559547424, "learning_rate": 1.1553583674930957e-05, "loss": 0.4937, "step": 6010 }, { "epoch": 1.5213869906352822, "grad_norm": 0.16151955723762512, "learning_rate": 1.1551218377779925e-05, "loss": 0.4764, "step": 6011 }, { "epoch": 1.5216400911161732, "grad_norm": 0.17718568444252014, "learning_rate": 1.1548852991700836e-05, "loss": 0.4849, "step": 6012 }, { "epoch": 1.521893191597064, "grad_norm": 0.15990592539310455, "learning_rate": 1.154648751682929e-05, "loss": 0.4678, "step": 6013 }, { "epoch": 1.522146292077955, "grad_norm": 0.15857413411140442, "learning_rate": 1.154412195330089e-05, "loss": 0.4861, "step": 6014 }, { "epoch": 1.522399392558846, "grad_norm": 0.158157616853714, "learning_rate": 1.154175630125126e-05, "loss": 0.455, "step": 6015 }, { "epoch": 1.522652493039737, "grad_norm": 0.1602904349565506, "learning_rate": 1.1539390560816011e-05, "loss": 0.4981, "step": 6016 }, { "epoch": 1.5229055935206277, "grad_norm": 0.16789625585079193, "learning_rate": 1.1537024732130763e-05, "loss": 0.4864, "step": 6017 }, { "epoch": 1.5231586940015185, "grad_norm": 0.16760557889938354, "learning_rate": 1.1534658815331152e-05, "loss": 0.5083, "step": 6018 }, { "epoch": 1.5234117944824095, "grad_norm": 0.16191501915454865, "learning_rate": 1.1532292810552805e-05, "loss": 0.4928, "step": 6019 }, { "epoch": 1.5236648949633005, "grad_norm": 0.1556071937084198, "learning_rate": 1.152992671793136e-05, "loss": 0.4663, "step": 6020 }, { "epoch": 1.5239179954441915, "grad_norm": 0.17192058265209198, "learning_rate": 1.1527560537602462e-05, "loss": 0.4785, "step": 6021 }, { "epoch": 1.5241710959250823, "grad_norm": 0.16249534487724304, "learning_rate": 1.152519426970176e-05, "loss": 0.4957, "step": 6022 }, { "epoch": 1.524424196405973, "grad_norm": 0.16552205383777618, "learning_rate": 1.1522827914364907e-05, "loss": 0.5069, "step": 6023 }, { "epoch": 1.524677296886864, "grad_norm": 0.17240549623966217, "learning_rate": 1.1520461471727557e-05, "loss": 0.4977, "step": 6024 }, { "epoch": 1.524930397367755, "grad_norm": 0.16157296299934387, "learning_rate": 1.1518094941925377e-05, "loss": 0.5026, "step": 6025 }, { "epoch": 1.525183497848646, "grad_norm": 0.16400471329689026, "learning_rate": 1.1515728325094036e-05, "loss": 0.4655, "step": 6026 }, { "epoch": 1.5254365983295368, "grad_norm": 0.16166937351226807, "learning_rate": 1.1513361621369202e-05, "loss": 0.4824, "step": 6027 }, { "epoch": 1.5256896988104276, "grad_norm": 0.1604999601840973, "learning_rate": 1.1510994830886557e-05, "loss": 0.4968, "step": 6028 }, { "epoch": 1.5259427992913186, "grad_norm": 0.15818773210048676, "learning_rate": 1.1508627953781786e-05, "loss": 0.479, "step": 6029 }, { "epoch": 1.5261958997722096, "grad_norm": 0.17188431322574615, "learning_rate": 1.1506260990190574e-05, "loss": 0.4687, "step": 6030 }, { "epoch": 1.5264490002531006, "grad_norm": 0.15775451064109802, "learning_rate": 1.1503893940248614e-05, "loss": 0.4847, "step": 6031 }, { "epoch": 1.5267021007339914, "grad_norm": 0.16591380536556244, "learning_rate": 1.1501526804091603e-05, "loss": 0.4851, "step": 6032 }, { "epoch": 1.5269552012148822, "grad_norm": 0.18800006806850433, "learning_rate": 1.1499159581855246e-05, "loss": 0.4763, "step": 6033 }, { "epoch": 1.5272083016957732, "grad_norm": 0.16187185049057007, "learning_rate": 1.1496792273675254e-05, "loss": 0.4912, "step": 6034 }, { "epoch": 1.5274614021766642, "grad_norm": 0.16598467528820038, "learning_rate": 1.1494424879687333e-05, "loss": 0.4989, "step": 6035 }, { "epoch": 1.5277145026575552, "grad_norm": 0.1616646647453308, "learning_rate": 1.1492057400027202e-05, "loss": 0.4799, "step": 6036 }, { "epoch": 1.527967603138446, "grad_norm": 0.16038240492343903, "learning_rate": 1.1489689834830588e-05, "loss": 0.4765, "step": 6037 }, { "epoch": 1.528220703619337, "grad_norm": 0.16605757176876068, "learning_rate": 1.1487322184233216e-05, "loss": 0.4976, "step": 6038 }, { "epoch": 1.5284738041002277, "grad_norm": 0.16207362711429596, "learning_rate": 1.1484954448370816e-05, "loss": 0.4906, "step": 6039 }, { "epoch": 1.5287269045811187, "grad_norm": 0.15536367893218994, "learning_rate": 1.148258662737913e-05, "loss": 0.5071, "step": 6040 }, { "epoch": 1.5289800050620097, "grad_norm": 0.19866088032722473, "learning_rate": 1.1480218721393893e-05, "loss": 0.487, "step": 6041 }, { "epoch": 1.5292331055429005, "grad_norm": 0.16344158351421356, "learning_rate": 1.1477850730550859e-05, "loss": 0.4982, "step": 6042 }, { "epoch": 1.5294862060237915, "grad_norm": 0.1697426587343216, "learning_rate": 1.1475482654985782e-05, "loss": 0.5015, "step": 6043 }, { "epoch": 1.5297393065046823, "grad_norm": 0.16369575262069702, "learning_rate": 1.147311449483441e-05, "loss": 0.4852, "step": 6044 }, { "epoch": 1.5299924069855733, "grad_norm": 0.15896165370941162, "learning_rate": 1.1470746250232507e-05, "loss": 0.4777, "step": 6045 }, { "epoch": 1.5302455074664643, "grad_norm": 0.16582834720611572, "learning_rate": 1.1468377921315845e-05, "loss": 0.4716, "step": 6046 }, { "epoch": 1.5304986079473553, "grad_norm": 0.16499210894107819, "learning_rate": 1.1466009508220188e-05, "loss": 0.4812, "step": 6047 }, { "epoch": 1.530751708428246, "grad_norm": 0.16045254468917847, "learning_rate": 1.1463641011081319e-05, "loss": 0.4751, "step": 6048 }, { "epoch": 1.5310048089091368, "grad_norm": 0.1642656922340393, "learning_rate": 1.1461272430035014e-05, "loss": 0.4953, "step": 6049 }, { "epoch": 1.5312579093900278, "grad_norm": 0.16166755557060242, "learning_rate": 1.145890376521706e-05, "loss": 0.5212, "step": 6050 }, { "epoch": 1.5315110098709188, "grad_norm": 0.1629316359758377, "learning_rate": 1.1456535016763248e-05, "loss": 0.4896, "step": 6051 }, { "epoch": 1.5317641103518098, "grad_norm": 0.1639319658279419, "learning_rate": 1.1454166184809374e-05, "loss": 0.4879, "step": 6052 }, { "epoch": 1.5320172108327006, "grad_norm": 0.16108523309230804, "learning_rate": 1.1451797269491236e-05, "loss": 0.5053, "step": 6053 }, { "epoch": 1.5322703113135914, "grad_norm": 0.15992994606494904, "learning_rate": 1.1449428270944643e-05, "loss": 0.4653, "step": 6054 }, { "epoch": 1.5325234117944824, "grad_norm": 0.16208277642726898, "learning_rate": 1.1447059189305398e-05, "loss": 0.4858, "step": 6055 }, { "epoch": 1.5327765122753734, "grad_norm": 0.1578952521085739, "learning_rate": 1.1444690024709323e-05, "loss": 0.4776, "step": 6056 }, { "epoch": 1.5330296127562644, "grad_norm": 0.16124160587787628, "learning_rate": 1.1442320777292232e-05, "loss": 0.4934, "step": 6057 }, { "epoch": 1.5332827132371551, "grad_norm": 0.16677691042423248, "learning_rate": 1.1439951447189948e-05, "loss": 0.481, "step": 6058 }, { "epoch": 1.533535813718046, "grad_norm": 0.1651972532272339, "learning_rate": 1.1437582034538307e-05, "loss": 0.4737, "step": 6059 }, { "epoch": 1.533788914198937, "grad_norm": 0.1658448576927185, "learning_rate": 1.1435212539473132e-05, "loss": 0.5057, "step": 6060 }, { "epoch": 1.534042014679828, "grad_norm": 0.15817581117153168, "learning_rate": 1.1432842962130266e-05, "loss": 0.4862, "step": 6061 }, { "epoch": 1.534295115160719, "grad_norm": 0.1616123467683792, "learning_rate": 1.1430473302645558e-05, "loss": 0.4858, "step": 6062 }, { "epoch": 1.5345482156416097, "grad_norm": 0.1766502857208252, "learning_rate": 1.1428103561154847e-05, "loss": 0.4785, "step": 6063 }, { "epoch": 1.5348013161225005, "grad_norm": 0.16253872215747833, "learning_rate": 1.1425733737793986e-05, "loss": 0.4982, "step": 6064 }, { "epoch": 1.5350544166033915, "grad_norm": 0.1648051142692566, "learning_rate": 1.142336383269884e-05, "loss": 0.4855, "step": 6065 }, { "epoch": 1.5353075170842825, "grad_norm": 0.1603965163230896, "learning_rate": 1.142099384600526e-05, "loss": 0.4713, "step": 6066 }, { "epoch": 1.5355606175651735, "grad_norm": 0.1654924899339676, "learning_rate": 1.1418623777849118e-05, "loss": 0.4723, "step": 6067 }, { "epoch": 1.5358137180460643, "grad_norm": 0.16030721366405487, "learning_rate": 1.1416253628366285e-05, "loss": 0.4807, "step": 6068 }, { "epoch": 1.536066818526955, "grad_norm": 0.16769303381443024, "learning_rate": 1.1413883397692636e-05, "loss": 0.4726, "step": 6069 }, { "epoch": 1.536319919007846, "grad_norm": 0.16449008882045746, "learning_rate": 1.1411513085964047e-05, "loss": 0.4822, "step": 6070 }, { "epoch": 1.536573019488737, "grad_norm": 0.16596725583076477, "learning_rate": 1.140914269331641e-05, "loss": 0.507, "step": 6071 }, { "epoch": 1.536826119969628, "grad_norm": 0.16278956830501556, "learning_rate": 1.1406772219885614e-05, "loss": 0.4727, "step": 6072 }, { "epoch": 1.5370792204505188, "grad_norm": 0.16582341492176056, "learning_rate": 1.1404401665807548e-05, "loss": 0.4681, "step": 6073 }, { "epoch": 1.5373323209314098, "grad_norm": 0.16596490144729614, "learning_rate": 1.1402031031218113e-05, "loss": 0.4774, "step": 6074 }, { "epoch": 1.5375854214123006, "grad_norm": 0.1605575978755951, "learning_rate": 1.1399660316253214e-05, "loss": 0.4735, "step": 6075 }, { "epoch": 1.5378385218931916, "grad_norm": 0.1650431603193283, "learning_rate": 1.1397289521048758e-05, "loss": 0.473, "step": 6076 }, { "epoch": 1.5380916223740826, "grad_norm": 0.1645522266626358, "learning_rate": 1.1394918645740657e-05, "loss": 0.4736, "step": 6077 }, { "epoch": 1.5383447228549736, "grad_norm": 0.16255275905132294, "learning_rate": 1.139254769046483e-05, "loss": 0.4682, "step": 6078 }, { "epoch": 1.5385978233358644, "grad_norm": 0.15836960077285767, "learning_rate": 1.1390176655357198e-05, "loss": 0.4732, "step": 6079 }, { "epoch": 1.5388509238167551, "grad_norm": 0.17045430839061737, "learning_rate": 1.1387805540553687e-05, "loss": 0.4764, "step": 6080 }, { "epoch": 1.5391040242976461, "grad_norm": 0.1656685769557953, "learning_rate": 1.138543434619023e-05, "loss": 0.4661, "step": 6081 }, { "epoch": 1.5393571247785371, "grad_norm": 0.16449882090091705, "learning_rate": 1.1383063072402758e-05, "loss": 0.4982, "step": 6082 }, { "epoch": 1.5396102252594281, "grad_norm": 0.16851675510406494, "learning_rate": 1.1380691719327213e-05, "loss": 0.5016, "step": 6083 }, { "epoch": 1.539863325740319, "grad_norm": 0.16562007367610931, "learning_rate": 1.1378320287099545e-05, "loss": 0.5005, "step": 6084 }, { "epoch": 1.5401164262212097, "grad_norm": 0.15573669970035553, "learning_rate": 1.1375948775855695e-05, "loss": 0.4752, "step": 6085 }, { "epoch": 1.5403695267021007, "grad_norm": 0.1618313193321228, "learning_rate": 1.137357718573162e-05, "loss": 0.493, "step": 6086 }, { "epoch": 1.5406226271829917, "grad_norm": 0.16470074653625488, "learning_rate": 1.137120551686328e-05, "loss": 0.4998, "step": 6087 }, { "epoch": 1.5408757276638827, "grad_norm": 0.16483528912067413, "learning_rate": 1.1368833769386635e-05, "loss": 0.4902, "step": 6088 }, { "epoch": 1.5411288281447735, "grad_norm": 0.16344454884529114, "learning_rate": 1.1366461943437657e-05, "loss": 0.4783, "step": 6089 }, { "epoch": 1.5413819286256643, "grad_norm": 0.1640762984752655, "learning_rate": 1.1364090039152312e-05, "loss": 0.4964, "step": 6090 }, { "epoch": 1.5416350291065553, "grad_norm": 0.15683868527412415, "learning_rate": 1.136171805666658e-05, "loss": 0.5026, "step": 6091 }, { "epoch": 1.5418881295874463, "grad_norm": 0.1676829606294632, "learning_rate": 1.1359345996116438e-05, "loss": 0.4889, "step": 6092 }, { "epoch": 1.5421412300683373, "grad_norm": 0.16166944801807404, "learning_rate": 1.1356973857637874e-05, "loss": 0.4635, "step": 6093 }, { "epoch": 1.542394330549228, "grad_norm": 0.1695765107870102, "learning_rate": 1.1354601641366879e-05, "loss": 0.5065, "step": 6094 }, { "epoch": 1.5426474310301188, "grad_norm": 0.16345259547233582, "learning_rate": 1.1352229347439443e-05, "loss": 0.4922, "step": 6095 }, { "epoch": 1.5429005315110098, "grad_norm": 0.16592632234096527, "learning_rate": 1.134985697599157e-05, "loss": 0.495, "step": 6096 }, { "epoch": 1.5431536319919008, "grad_norm": 0.1587771624326706, "learning_rate": 1.1347484527159258e-05, "loss": 0.4884, "step": 6097 }, { "epoch": 1.5434067324727918, "grad_norm": 0.1618434339761734, "learning_rate": 1.1345112001078517e-05, "loss": 0.489, "step": 6098 }, { "epoch": 1.5436598329536826, "grad_norm": 0.16356176137924194, "learning_rate": 1.1342739397885356e-05, "loss": 0.5074, "step": 6099 }, { "epoch": 1.5439129334345734, "grad_norm": 0.16349127888679504, "learning_rate": 1.1340366717715796e-05, "loss": 0.4654, "step": 6100 }, { "epoch": 1.5441660339154644, "grad_norm": 0.16145262122154236, "learning_rate": 1.1337993960705858e-05, "loss": 0.4869, "step": 6101 }, { "epoch": 1.5444191343963554, "grad_norm": 0.15885640680789948, "learning_rate": 1.1335621126991558e-05, "loss": 0.4922, "step": 6102 }, { "epoch": 1.5446722348772464, "grad_norm": 0.19264182448387146, "learning_rate": 1.1333248216708936e-05, "loss": 0.4921, "step": 6103 }, { "epoch": 1.5449253353581371, "grad_norm": 0.162505105137825, "learning_rate": 1.1330875229994023e-05, "loss": 0.4714, "step": 6104 }, { "epoch": 1.5451784358390281, "grad_norm": 0.16483478248119354, "learning_rate": 1.1328502166982851e-05, "loss": 0.4937, "step": 6105 }, { "epoch": 1.545431536319919, "grad_norm": 0.16139884293079376, "learning_rate": 1.1326129027811474e-05, "loss": 0.492, "step": 6106 }, { "epoch": 1.54568463680081, "grad_norm": 0.15789364278316498, "learning_rate": 1.132375581261593e-05, "loss": 0.4702, "step": 6107 }, { "epoch": 1.545937737281701, "grad_norm": 0.16137827932834625, "learning_rate": 1.1321382521532272e-05, "loss": 0.4669, "step": 6108 }, { "epoch": 1.5461908377625917, "grad_norm": 0.16375203430652618, "learning_rate": 1.1319009154696559e-05, "loss": 0.4629, "step": 6109 }, { "epoch": 1.5464439382434827, "grad_norm": 0.16544851660728455, "learning_rate": 1.1316635712244849e-05, "loss": 0.4855, "step": 6110 }, { "epoch": 1.5466970387243735, "grad_norm": 0.21630766987800598, "learning_rate": 1.1314262194313206e-05, "loss": 0.47, "step": 6111 }, { "epoch": 1.5469501392052645, "grad_norm": 0.1653144806623459, "learning_rate": 1.13118886010377e-05, "loss": 0.5131, "step": 6112 }, { "epoch": 1.5472032396861555, "grad_norm": 0.1662723869085312, "learning_rate": 1.1309514932554402e-05, "loss": 0.4713, "step": 6113 }, { "epoch": 1.5474563401670465, "grad_norm": 0.16142095625400543, "learning_rate": 1.1307141188999391e-05, "loss": 0.471, "step": 6114 }, { "epoch": 1.5477094406479373, "grad_norm": 0.1630452573299408, "learning_rate": 1.1304767370508749e-05, "loss": 0.4833, "step": 6115 }, { "epoch": 1.547962541128828, "grad_norm": 0.16036514937877655, "learning_rate": 1.130239347721856e-05, "loss": 0.46, "step": 6116 }, { "epoch": 1.548215641609719, "grad_norm": 0.16283151507377625, "learning_rate": 1.1300019509264916e-05, "loss": 0.4916, "step": 6117 }, { "epoch": 1.54846874209061, "grad_norm": 0.1634528934955597, "learning_rate": 1.1297645466783912e-05, "loss": 0.492, "step": 6118 }, { "epoch": 1.548721842571501, "grad_norm": 0.172268345952034, "learning_rate": 1.1295271349911646e-05, "loss": 0.4904, "step": 6119 }, { "epoch": 1.5489749430523918, "grad_norm": 0.16649766266345978, "learning_rate": 1.1292897158784224e-05, "loss": 0.4894, "step": 6120 }, { "epoch": 1.5492280435332826, "grad_norm": 0.1637597233057022, "learning_rate": 1.1290522893537746e-05, "loss": 0.5061, "step": 6121 }, { "epoch": 1.5494811440141736, "grad_norm": 0.1638418734073639, "learning_rate": 1.1288148554308328e-05, "loss": 0.4865, "step": 6122 }, { "epoch": 1.5497342444950646, "grad_norm": 0.16839566826820374, "learning_rate": 1.1285774141232088e-05, "loss": 0.5026, "step": 6123 }, { "epoch": 1.5499873449759556, "grad_norm": 0.16685627400875092, "learning_rate": 1.1283399654445141e-05, "loss": 0.4786, "step": 6124 }, { "epoch": 1.5502404454568464, "grad_norm": 0.15871673822402954, "learning_rate": 1.1281025094083618e-05, "loss": 0.4723, "step": 6125 }, { "epoch": 1.5504935459377371, "grad_norm": 0.16159109771251678, "learning_rate": 1.1278650460283644e-05, "loss": 0.5031, "step": 6126 }, { "epoch": 1.5507466464186281, "grad_norm": 0.16552217304706573, "learning_rate": 1.1276275753181347e-05, "loss": 0.5158, "step": 6127 }, { "epoch": 1.5509997468995191, "grad_norm": 0.16507560014724731, "learning_rate": 1.1273900972912872e-05, "loss": 0.4959, "step": 6128 }, { "epoch": 1.5512528473804101, "grad_norm": 0.17548146843910217, "learning_rate": 1.1271526119614356e-05, "loss": 0.4805, "step": 6129 }, { "epoch": 1.551505947861301, "grad_norm": 0.16732732951641083, "learning_rate": 1.1269151193421943e-05, "loss": 0.5369, "step": 6130 }, { "epoch": 1.5517590483421917, "grad_norm": 0.16840054094791412, "learning_rate": 1.1266776194471786e-05, "loss": 0.5099, "step": 6131 }, { "epoch": 1.5520121488230827, "grad_norm": 0.15928596258163452, "learning_rate": 1.1264401122900037e-05, "loss": 0.4915, "step": 6132 }, { "epoch": 1.5522652493039737, "grad_norm": 0.16414225101470947, "learning_rate": 1.1262025978842854e-05, "loss": 0.4978, "step": 6133 }, { "epoch": 1.5525183497848647, "grad_norm": 0.16259132325649261, "learning_rate": 1.1259650762436399e-05, "loss": 0.4962, "step": 6134 }, { "epoch": 1.5527714502657555, "grad_norm": 0.16376087069511414, "learning_rate": 1.1257275473816835e-05, "loss": 0.4709, "step": 6135 }, { "epoch": 1.5530245507466465, "grad_norm": 0.16009953618049622, "learning_rate": 1.1254900113120337e-05, "loss": 0.4587, "step": 6136 }, { "epoch": 1.5532776512275372, "grad_norm": 0.16092225909233093, "learning_rate": 1.1252524680483076e-05, "loss": 0.4916, "step": 6137 }, { "epoch": 1.5535307517084282, "grad_norm": 0.16344976425170898, "learning_rate": 1.1250149176041234e-05, "loss": 0.4797, "step": 6138 }, { "epoch": 1.5537838521893192, "grad_norm": 0.1685333251953125, "learning_rate": 1.124777359993099e-05, "loss": 0.4906, "step": 6139 }, { "epoch": 1.55403695267021, "grad_norm": 0.1633366048336029, "learning_rate": 1.1245397952288533e-05, "loss": 0.4903, "step": 6140 }, { "epoch": 1.554290053151101, "grad_norm": 0.1610420197248459, "learning_rate": 1.1243022233250054e-05, "loss": 0.4569, "step": 6141 }, { "epoch": 1.5545431536319918, "grad_norm": 0.16304565966129303, "learning_rate": 1.1240646442951748e-05, "loss": 0.4983, "step": 6142 }, { "epoch": 1.5547962541128828, "grad_norm": 0.15688267350196838, "learning_rate": 1.123827058152981e-05, "loss": 0.4525, "step": 6143 }, { "epoch": 1.5550493545937738, "grad_norm": 0.16370242834091187, "learning_rate": 1.1235894649120447e-05, "loss": 0.5122, "step": 6144 }, { "epoch": 1.5553024550746648, "grad_norm": 0.16898714005947113, "learning_rate": 1.1233518645859869e-05, "loss": 0.4854, "step": 6145 }, { "epoch": 1.5555555555555556, "grad_norm": 0.16287142038345337, "learning_rate": 1.1231142571884277e-05, "loss": 0.4983, "step": 6146 }, { "epoch": 1.5558086560364464, "grad_norm": 0.16393983364105225, "learning_rate": 1.1228766427329898e-05, "loss": 0.4849, "step": 6147 }, { "epoch": 1.5560617565173374, "grad_norm": 0.16282449662685394, "learning_rate": 1.1226390212332948e-05, "loss": 0.5046, "step": 6148 }, { "epoch": 1.5563148569982284, "grad_norm": 0.16159020364284515, "learning_rate": 1.1224013927029643e-05, "loss": 0.4864, "step": 6149 }, { "epoch": 1.5565679574791194, "grad_norm": 0.168535053730011, "learning_rate": 1.1221637571556217e-05, "loss": 0.5029, "step": 6150 }, { "epoch": 1.5568210579600101, "grad_norm": 0.16166996955871582, "learning_rate": 1.1219261146048905e-05, "loss": 0.4724, "step": 6151 }, { "epoch": 1.557074158440901, "grad_norm": 0.16324053704738617, "learning_rate": 1.121688465064393e-05, "loss": 0.492, "step": 6152 }, { "epoch": 1.557327258921792, "grad_norm": 0.16318577527999878, "learning_rate": 1.1214508085477548e-05, "loss": 0.4881, "step": 6153 }, { "epoch": 1.557580359402683, "grad_norm": 0.15705284476280212, "learning_rate": 1.1212131450685991e-05, "loss": 0.4444, "step": 6154 }, { "epoch": 1.557833459883574, "grad_norm": 0.1620665192604065, "learning_rate": 1.1209754746405506e-05, "loss": 0.4751, "step": 6155 }, { "epoch": 1.5580865603644647, "grad_norm": 0.344419002532959, "learning_rate": 1.1207377972772352e-05, "loss": 0.4962, "step": 6156 }, { "epoch": 1.5583396608453555, "grad_norm": 0.1598355919122696, "learning_rate": 1.1205001129922777e-05, "loss": 0.492, "step": 6157 }, { "epoch": 1.5585927613262465, "grad_norm": 0.16420428454875946, "learning_rate": 1.1202624217993044e-05, "loss": 0.4753, "step": 6158 }, { "epoch": 1.5588458618071375, "grad_norm": 0.16585776209831238, "learning_rate": 1.1200247237119415e-05, "loss": 0.5157, "step": 6159 }, { "epoch": 1.5590989622880285, "grad_norm": 0.1664457768201828, "learning_rate": 1.1197870187438159e-05, "loss": 0.4612, "step": 6160 }, { "epoch": 1.5593520627689192, "grad_norm": 0.1641852855682373, "learning_rate": 1.1195493069085543e-05, "loss": 0.4719, "step": 6161 }, { "epoch": 1.55960516324981, "grad_norm": 0.16246657073497772, "learning_rate": 1.119311588219785e-05, "loss": 0.4708, "step": 6162 }, { "epoch": 1.559858263730701, "grad_norm": 0.1627984344959259, "learning_rate": 1.1190738626911351e-05, "loss": 0.4751, "step": 6163 }, { "epoch": 1.560111364211592, "grad_norm": 0.1726343333721161, "learning_rate": 1.1188361303362334e-05, "loss": 0.4791, "step": 6164 }, { "epoch": 1.560364464692483, "grad_norm": 0.16769856214523315, "learning_rate": 1.1185983911687082e-05, "loss": 0.4332, "step": 6165 }, { "epoch": 1.5606175651733738, "grad_norm": 0.1580900251865387, "learning_rate": 1.118360645202189e-05, "loss": 0.4644, "step": 6166 }, { "epoch": 1.5608706656542646, "grad_norm": 0.16279515624046326, "learning_rate": 1.1181228924503054e-05, "loss": 0.4977, "step": 6167 }, { "epoch": 1.5611237661351556, "grad_norm": 0.1646638959646225, "learning_rate": 1.1178851329266862e-05, "loss": 0.49, "step": 6168 }, { "epoch": 1.5613768666160466, "grad_norm": 0.17124244570732117, "learning_rate": 1.117647366644963e-05, "loss": 0.4859, "step": 6169 }, { "epoch": 1.5616299670969376, "grad_norm": 0.16552038490772247, "learning_rate": 1.1174095936187656e-05, "loss": 0.5051, "step": 6170 }, { "epoch": 1.5618830675778284, "grad_norm": 0.16351264715194702, "learning_rate": 1.1171718138617251e-05, "loss": 0.4501, "step": 6171 }, { "epoch": 1.5621361680587194, "grad_norm": 0.16256840527057648, "learning_rate": 1.1169340273874733e-05, "loss": 0.4837, "step": 6172 }, { "epoch": 1.5623892685396101, "grad_norm": 0.16159574687480927, "learning_rate": 1.1166962342096419e-05, "loss": 0.5024, "step": 6173 }, { "epoch": 1.5626423690205011, "grad_norm": 0.17786665260791779, "learning_rate": 1.1164584343418627e-05, "loss": 0.5002, "step": 6174 }, { "epoch": 1.5628954695013921, "grad_norm": 0.18033507466316223, "learning_rate": 1.1162206277977685e-05, "loss": 0.5034, "step": 6175 }, { "epoch": 1.5631485699822831, "grad_norm": 0.16673119366168976, "learning_rate": 1.1159828145909924e-05, "loss": 0.4848, "step": 6176 }, { "epoch": 1.563401670463174, "grad_norm": 0.16239602863788605, "learning_rate": 1.1157449947351676e-05, "loss": 0.5076, "step": 6177 }, { "epoch": 1.5636547709440647, "grad_norm": 0.16759084165096283, "learning_rate": 1.1155071682439274e-05, "loss": 0.4894, "step": 6178 }, { "epoch": 1.5639078714249557, "grad_norm": 0.1647527813911438, "learning_rate": 1.1152693351309065e-05, "loss": 0.4857, "step": 6179 }, { "epoch": 1.5641609719058467, "grad_norm": 0.16565260291099548, "learning_rate": 1.1150314954097392e-05, "loss": 0.4989, "step": 6180 }, { "epoch": 1.5644140723867377, "grad_norm": 0.16294240951538086, "learning_rate": 1.11479364909406e-05, "loss": 0.4854, "step": 6181 }, { "epoch": 1.5646671728676285, "grad_norm": 0.1603868156671524, "learning_rate": 1.1145557961975047e-05, "loss": 0.4931, "step": 6182 }, { "epoch": 1.5649202733485192, "grad_norm": 0.17370323836803436, "learning_rate": 1.1143179367337084e-05, "loss": 0.4685, "step": 6183 }, { "epoch": 1.5651733738294102, "grad_norm": 0.15968601405620575, "learning_rate": 1.1140800707163074e-05, "loss": 0.4942, "step": 6184 }, { "epoch": 1.5654264743103012, "grad_norm": 0.17598696053028107, "learning_rate": 1.113842198158938e-05, "loss": 0.5078, "step": 6185 }, { "epoch": 1.5656795747911922, "grad_norm": 0.16288626194000244, "learning_rate": 1.1136043190752368e-05, "loss": 0.4839, "step": 6186 }, { "epoch": 1.565932675272083, "grad_norm": 0.16555307805538177, "learning_rate": 1.113366433478841e-05, "loss": 0.481, "step": 6187 }, { "epoch": 1.5661857757529738, "grad_norm": 0.1661251336336136, "learning_rate": 1.113128541383388e-05, "loss": 0.5132, "step": 6188 }, { "epoch": 1.5664388762338648, "grad_norm": 0.1598391830921173, "learning_rate": 1.112890642802516e-05, "loss": 0.4644, "step": 6189 }, { "epoch": 1.5666919767147558, "grad_norm": 0.16757960617542267, "learning_rate": 1.1126527377498622e-05, "loss": 0.4833, "step": 6190 }, { "epoch": 1.5669450771956468, "grad_norm": 0.16494117677211761, "learning_rate": 1.1124148262390663e-05, "loss": 0.4793, "step": 6191 }, { "epoch": 1.5671981776765376, "grad_norm": 0.16768710315227509, "learning_rate": 1.1121769082837672e-05, "loss": 0.4642, "step": 6192 }, { "epoch": 1.5674512781574284, "grad_norm": 0.1616028994321823, "learning_rate": 1.1119389838976033e-05, "loss": 0.4782, "step": 6193 }, { "epoch": 1.5677043786383194, "grad_norm": 0.15757335722446442, "learning_rate": 1.1117010530942152e-05, "loss": 0.4743, "step": 6194 }, { "epoch": 1.5679574791192104, "grad_norm": 0.17572472989559174, "learning_rate": 1.1114631158872429e-05, "loss": 0.5223, "step": 6195 }, { "epoch": 1.5682105796001014, "grad_norm": 0.16187196969985962, "learning_rate": 1.1112251722903262e-05, "loss": 0.4666, "step": 6196 }, { "epoch": 1.5684636800809921, "grad_norm": 0.17140817642211914, "learning_rate": 1.1109872223171062e-05, "loss": 0.4806, "step": 6197 }, { "epoch": 1.568716780561883, "grad_norm": 0.1653946340084076, "learning_rate": 1.1107492659812248e-05, "loss": 0.5033, "step": 6198 }, { "epoch": 1.568969881042774, "grad_norm": 0.16876547038555145, "learning_rate": 1.1105113032963226e-05, "loss": 0.462, "step": 6199 }, { "epoch": 1.569222981523665, "grad_norm": 0.19149713218212128, "learning_rate": 1.1102733342760415e-05, "loss": 0.4831, "step": 6200 }, { "epoch": 1.569476082004556, "grad_norm": 0.17221638560295105, "learning_rate": 1.1100353589340247e-05, "loss": 0.4961, "step": 6201 }, { "epoch": 1.5697291824854467, "grad_norm": 0.16801275312900543, "learning_rate": 1.1097973772839137e-05, "loss": 0.4804, "step": 6202 }, { "epoch": 1.5699822829663377, "grad_norm": 0.16483886539936066, "learning_rate": 1.1095593893393522e-05, "loss": 0.4723, "step": 6203 }, { "epoch": 1.5702353834472285, "grad_norm": 0.1626865565776825, "learning_rate": 1.1093213951139832e-05, "loss": 0.484, "step": 6204 }, { "epoch": 1.5704884839281195, "grad_norm": 0.16570746898651123, "learning_rate": 1.1090833946214502e-05, "loss": 0.4716, "step": 6205 }, { "epoch": 1.5707415844090105, "grad_norm": 0.16090524196624756, "learning_rate": 1.108845387875398e-05, "loss": 0.4954, "step": 6206 }, { "epoch": 1.5709946848899012, "grad_norm": 0.1733119785785675, "learning_rate": 1.1086073748894705e-05, "loss": 0.4823, "step": 6207 }, { "epoch": 1.5712477853707922, "grad_norm": 0.16438797116279602, "learning_rate": 1.1083693556773124e-05, "loss": 0.5018, "step": 6208 }, { "epoch": 1.571500885851683, "grad_norm": 0.1608227789402008, "learning_rate": 1.1081313302525693e-05, "loss": 0.506, "step": 6209 }, { "epoch": 1.571753986332574, "grad_norm": 0.16070856153964996, "learning_rate": 1.1078932986288864e-05, "loss": 0.4907, "step": 6210 }, { "epoch": 1.572007086813465, "grad_norm": 0.16118626296520233, "learning_rate": 1.1076552608199094e-05, "loss": 0.4701, "step": 6211 }, { "epoch": 1.572260187294356, "grad_norm": 0.17201262712478638, "learning_rate": 1.1074172168392847e-05, "loss": 0.5051, "step": 6212 }, { "epoch": 1.5725132877752468, "grad_norm": 0.1664821207523346, "learning_rate": 1.1071791667006589e-05, "loss": 0.4886, "step": 6213 }, { "epoch": 1.5727663882561376, "grad_norm": 0.167556032538414, "learning_rate": 1.106941110417679e-05, "loss": 0.48, "step": 6214 }, { "epoch": 1.5730194887370286, "grad_norm": 0.16518545150756836, "learning_rate": 1.1067030480039915e-05, "loss": 0.4771, "step": 6215 }, { "epoch": 1.5732725892179196, "grad_norm": 0.15995338559150696, "learning_rate": 1.1064649794732449e-05, "loss": 0.4541, "step": 6216 }, { "epoch": 1.5735256896988106, "grad_norm": 0.16765834391117096, "learning_rate": 1.1062269048390873e-05, "loss": 0.4973, "step": 6217 }, { "epoch": 1.5737787901797013, "grad_norm": 0.1634901612997055, "learning_rate": 1.1059888241151662e-05, "loss": 0.4737, "step": 6218 }, { "epoch": 1.5740318906605921, "grad_norm": 0.16715526580810547, "learning_rate": 1.1057507373151305e-05, "loss": 0.4873, "step": 6219 }, { "epoch": 1.5742849911414831, "grad_norm": 0.16472309827804565, "learning_rate": 1.1055126444526295e-05, "loss": 0.4879, "step": 6220 }, { "epoch": 1.5745380916223741, "grad_norm": 0.15923823416233063, "learning_rate": 1.1052745455413124e-05, "loss": 0.4627, "step": 6221 }, { "epoch": 1.5747911921032651, "grad_norm": 0.16096077859401703, "learning_rate": 1.1050364405948287e-05, "loss": 0.4728, "step": 6222 }, { "epoch": 1.575044292584156, "grad_norm": 0.16527977585792542, "learning_rate": 1.104798329626829e-05, "loss": 0.4756, "step": 6223 }, { "epoch": 1.5752973930650467, "grad_norm": 0.163514643907547, "learning_rate": 1.1045602126509634e-05, "loss": 0.4949, "step": 6224 }, { "epoch": 1.5755504935459377, "grad_norm": 0.15919791162014008, "learning_rate": 1.1043220896808824e-05, "loss": 0.4796, "step": 6225 }, { "epoch": 1.5758035940268287, "grad_norm": 0.16422170400619507, "learning_rate": 1.1040839607302373e-05, "loss": 0.5094, "step": 6226 }, { "epoch": 1.5760566945077197, "grad_norm": 0.16527771949768066, "learning_rate": 1.1038458258126797e-05, "loss": 0.4891, "step": 6227 }, { "epoch": 1.5763097949886105, "grad_norm": 0.16578124463558197, "learning_rate": 1.1036076849418609e-05, "loss": 0.4888, "step": 6228 }, { "epoch": 1.5765628954695012, "grad_norm": 0.16984455287456512, "learning_rate": 1.1033695381314335e-05, "loss": 0.5004, "step": 6229 }, { "epoch": 1.5768159959503922, "grad_norm": 0.16444902122020721, "learning_rate": 1.1031313853950495e-05, "loss": 0.4936, "step": 6230 }, { "epoch": 1.5770690964312832, "grad_norm": 0.16528642177581787, "learning_rate": 1.1028932267463624e-05, "loss": 0.5085, "step": 6231 }, { "epoch": 1.5773221969121742, "grad_norm": 0.1666153073310852, "learning_rate": 1.1026550621990244e-05, "loss": 0.4809, "step": 6232 }, { "epoch": 1.577575297393065, "grad_norm": 0.1682671457529068, "learning_rate": 1.1024168917666897e-05, "loss": 0.4789, "step": 6233 }, { "epoch": 1.577828397873956, "grad_norm": 0.16203345358371735, "learning_rate": 1.102178715463012e-05, "loss": 0.5013, "step": 6234 }, { "epoch": 1.5780814983548468, "grad_norm": 0.16671410202980042, "learning_rate": 1.1019405333016448e-05, "loss": 0.483, "step": 6235 }, { "epoch": 1.5783345988357378, "grad_norm": 0.15953010320663452, "learning_rate": 1.1017023452962435e-05, "loss": 0.4733, "step": 6236 }, { "epoch": 1.5785876993166288, "grad_norm": 0.1652543544769287, "learning_rate": 1.1014641514604623e-05, "loss": 0.4845, "step": 6237 }, { "epoch": 1.5788407997975196, "grad_norm": 0.1754102110862732, "learning_rate": 1.1012259518079565e-05, "loss": 0.4871, "step": 6238 }, { "epoch": 1.5790939002784106, "grad_norm": 0.1658346951007843, "learning_rate": 1.100987746352382e-05, "loss": 0.4961, "step": 6239 }, { "epoch": 1.5793470007593013, "grad_norm": 0.16397303342819214, "learning_rate": 1.1007495351073937e-05, "loss": 0.4768, "step": 6240 }, { "epoch": 1.5796001012401923, "grad_norm": 0.16744524240493774, "learning_rate": 1.1005113180866482e-05, "loss": 0.5312, "step": 6241 }, { "epoch": 1.5798532017210833, "grad_norm": 0.16604554653167725, "learning_rate": 1.1002730953038027e-05, "loss": 0.4879, "step": 6242 }, { "epoch": 1.5801063022019743, "grad_norm": 0.1651114821434021, "learning_rate": 1.1000348667725129e-05, "loss": 0.497, "step": 6243 }, { "epoch": 1.5803594026828651, "grad_norm": 0.162980318069458, "learning_rate": 1.0997966325064363e-05, "loss": 0.4619, "step": 6244 }, { "epoch": 1.580612503163756, "grad_norm": 0.16702976822853088, "learning_rate": 1.0995583925192308e-05, "loss": 0.4975, "step": 6245 }, { "epoch": 1.580865603644647, "grad_norm": 0.1674250364303589, "learning_rate": 1.0993201468245535e-05, "loss": 0.4818, "step": 6246 }, { "epoch": 1.581118704125538, "grad_norm": 0.1636473387479782, "learning_rate": 1.099081895436063e-05, "loss": 0.4787, "step": 6247 }, { "epoch": 1.581371804606429, "grad_norm": 0.16683930158615112, "learning_rate": 1.0988436383674176e-05, "loss": 0.4872, "step": 6248 }, { "epoch": 1.5816249050873197, "grad_norm": 0.16439498960971832, "learning_rate": 1.098605375632276e-05, "loss": 0.4605, "step": 6249 }, { "epoch": 1.5818780055682105, "grad_norm": 0.16893959045410156, "learning_rate": 1.0983671072442976e-05, "loss": 0.4771, "step": 6250 }, { "epoch": 1.5821311060491015, "grad_norm": 0.1641794592142105, "learning_rate": 1.0981288332171416e-05, "loss": 0.5004, "step": 6251 }, { "epoch": 1.5823842065299925, "grad_norm": 0.16201798617839813, "learning_rate": 1.0978905535644675e-05, "loss": 0.4606, "step": 6252 }, { "epoch": 1.5826373070108835, "grad_norm": 0.16853663325309753, "learning_rate": 1.0976522682999358e-05, "loss": 0.4717, "step": 6253 }, { "epoch": 1.5828904074917742, "grad_norm": 0.1579788327217102, "learning_rate": 1.0974139774372066e-05, "loss": 0.4743, "step": 6254 }, { "epoch": 1.583143507972665, "grad_norm": 0.16746573150157928, "learning_rate": 1.0971756809899408e-05, "loss": 0.4997, "step": 6255 }, { "epoch": 1.583396608453556, "grad_norm": 0.16384664177894592, "learning_rate": 1.0969373789717992e-05, "loss": 0.4822, "step": 6256 }, { "epoch": 1.583649708934447, "grad_norm": 0.17878137528896332, "learning_rate": 1.0966990713964433e-05, "loss": 0.4737, "step": 6257 }, { "epoch": 1.583902809415338, "grad_norm": 0.15912462770938873, "learning_rate": 1.0964607582775348e-05, "loss": 0.479, "step": 6258 }, { "epoch": 1.5841559098962288, "grad_norm": 0.16260462999343872, "learning_rate": 1.096222439628736e-05, "loss": 0.4935, "step": 6259 }, { "epoch": 1.5844090103771196, "grad_norm": 0.168402299284935, "learning_rate": 1.0959841154637083e-05, "loss": 0.4961, "step": 6260 }, { "epoch": 1.5846621108580106, "grad_norm": 0.16306819021701813, "learning_rate": 1.0957457857961153e-05, "loss": 0.5017, "step": 6261 }, { "epoch": 1.5849152113389016, "grad_norm": 0.17029763758182526, "learning_rate": 1.0955074506396193e-05, "loss": 0.4976, "step": 6262 }, { "epoch": 1.5851683118197926, "grad_norm": 0.1673326939344406, "learning_rate": 1.0952691100078836e-05, "loss": 0.5031, "step": 6263 }, { "epoch": 1.5854214123006833, "grad_norm": 0.16312293708324432, "learning_rate": 1.0950307639145724e-05, "loss": 0.4825, "step": 6264 }, { "epoch": 1.5856745127815741, "grad_norm": 0.1609962284564972, "learning_rate": 1.0947924123733487e-05, "loss": 0.4956, "step": 6265 }, { "epoch": 1.5859276132624651, "grad_norm": 0.16509892046451569, "learning_rate": 1.0945540553978768e-05, "loss": 0.4958, "step": 6266 }, { "epoch": 1.5861807137433561, "grad_norm": 0.16557718813419342, "learning_rate": 1.0943156930018219e-05, "loss": 0.4938, "step": 6267 }, { "epoch": 1.5864338142242471, "grad_norm": 0.16349440813064575, "learning_rate": 1.0940773251988483e-05, "loss": 0.4794, "step": 6268 }, { "epoch": 1.586686914705138, "grad_norm": 0.16202279925346375, "learning_rate": 1.0938389520026212e-05, "loss": 0.4925, "step": 6269 }, { "epoch": 1.586940015186029, "grad_norm": 0.16478179395198822, "learning_rate": 1.093600573426806e-05, "loss": 0.4832, "step": 6270 }, { "epoch": 1.5871931156669197, "grad_norm": 0.15921510756015778, "learning_rate": 1.0933621894850685e-05, "loss": 0.4939, "step": 6271 }, { "epoch": 1.5874462161478107, "grad_norm": 0.17064759135246277, "learning_rate": 1.0931238001910749e-05, "loss": 0.4895, "step": 6272 }, { "epoch": 1.5876993166287017, "grad_norm": 0.16491150856018066, "learning_rate": 1.0928854055584911e-05, "loss": 0.4664, "step": 6273 }, { "epoch": 1.5879524171095927, "grad_norm": 0.16853758692741394, "learning_rate": 1.0926470056009841e-05, "loss": 0.4953, "step": 6274 }, { "epoch": 1.5882055175904835, "grad_norm": 0.16642262041568756, "learning_rate": 1.092408600332221e-05, "loss": 0.4906, "step": 6275 }, { "epoch": 1.5884586180713742, "grad_norm": 0.16874180734157562, "learning_rate": 1.0921701897658685e-05, "loss": 0.4638, "step": 6276 }, { "epoch": 1.5887117185522652, "grad_norm": 0.1667145937681198, "learning_rate": 1.0919317739155948e-05, "loss": 0.4959, "step": 6277 }, { "epoch": 1.5889648190331562, "grad_norm": 0.16696839034557343, "learning_rate": 1.0916933527950675e-05, "loss": 0.4744, "step": 6278 }, { "epoch": 1.5892179195140472, "grad_norm": 0.16279910504817963, "learning_rate": 1.0914549264179547e-05, "loss": 0.4797, "step": 6279 }, { "epoch": 1.589471019994938, "grad_norm": 0.1634509116411209, "learning_rate": 1.091216494797925e-05, "loss": 0.4789, "step": 6280 }, { "epoch": 1.5897241204758288, "grad_norm": 0.16976231336593628, "learning_rate": 1.0909780579486473e-05, "loss": 0.4848, "step": 6281 }, { "epoch": 1.5899772209567198, "grad_norm": 0.16900351643562317, "learning_rate": 1.0907396158837903e-05, "loss": 0.4603, "step": 6282 }, { "epoch": 1.5902303214376108, "grad_norm": 0.1664009541273117, "learning_rate": 1.0905011686170235e-05, "loss": 0.4826, "step": 6283 }, { "epoch": 1.5904834219185018, "grad_norm": 0.16209569573402405, "learning_rate": 1.0902627161620171e-05, "loss": 0.496, "step": 6284 }, { "epoch": 1.5907365223993926, "grad_norm": 0.16794581711292267, "learning_rate": 1.0900242585324401e-05, "loss": 0.4923, "step": 6285 }, { "epoch": 1.5909896228802833, "grad_norm": 0.1635778248310089, "learning_rate": 1.089785795741964e-05, "loss": 0.4876, "step": 6286 }, { "epoch": 1.5912427233611743, "grad_norm": 0.15879647433757782, "learning_rate": 1.0895473278042584e-05, "loss": 0.46, "step": 6287 }, { "epoch": 1.5914958238420653, "grad_norm": 0.16084404289722443, "learning_rate": 1.089308854732994e-05, "loss": 0.4729, "step": 6288 }, { "epoch": 1.5917489243229563, "grad_norm": 0.160660520195961, "learning_rate": 1.0890703765418428e-05, "loss": 0.4657, "step": 6289 }, { "epoch": 1.5920020248038471, "grad_norm": 0.1626483052968979, "learning_rate": 1.088831893244476e-05, "loss": 0.5038, "step": 6290 }, { "epoch": 1.592255125284738, "grad_norm": 0.1880364716053009, "learning_rate": 1.0885934048545649e-05, "loss": 0.4976, "step": 6291 }, { "epoch": 1.592508225765629, "grad_norm": 0.16723693907260895, "learning_rate": 1.0883549113857819e-05, "loss": 0.4877, "step": 6292 }, { "epoch": 1.59276132624652, "grad_norm": 0.1687256097793579, "learning_rate": 1.0881164128517993e-05, "loss": 0.507, "step": 6293 }, { "epoch": 1.593014426727411, "grad_norm": 0.16306963562965393, "learning_rate": 1.0878779092662896e-05, "loss": 0.4869, "step": 6294 }, { "epoch": 1.5932675272083017, "grad_norm": 0.16294921934604645, "learning_rate": 1.0876394006429258e-05, "loss": 0.4868, "step": 6295 }, { "epoch": 1.5935206276891924, "grad_norm": 0.1756923347711563, "learning_rate": 1.0874008869953811e-05, "loss": 0.4835, "step": 6296 }, { "epoch": 1.5937737281700834, "grad_norm": 0.1615855097770691, "learning_rate": 1.087162368337329e-05, "loss": 0.4485, "step": 6297 }, { "epoch": 1.5940268286509744, "grad_norm": 0.16196033358573914, "learning_rate": 1.0869238446824433e-05, "loss": 0.4624, "step": 6298 }, { "epoch": 1.5942799291318654, "grad_norm": 0.16319462656974792, "learning_rate": 1.0866853160443977e-05, "loss": 0.4902, "step": 6299 }, { "epoch": 1.5945330296127562, "grad_norm": 0.16566620767116547, "learning_rate": 1.086446782436867e-05, "loss": 0.4679, "step": 6300 }, { "epoch": 1.5947861300936472, "grad_norm": 0.1640174388885498, "learning_rate": 1.0862082438735255e-05, "loss": 0.4812, "step": 6301 }, { "epoch": 1.595039230574538, "grad_norm": 0.1637568175792694, "learning_rate": 1.0859697003680482e-05, "loss": 0.4925, "step": 6302 }, { "epoch": 1.595292331055429, "grad_norm": 0.16560834646224976, "learning_rate": 1.085731151934111e-05, "loss": 0.5008, "step": 6303 }, { "epoch": 1.59554543153632, "grad_norm": 0.16961799561977386, "learning_rate": 1.0854925985853878e-05, "loss": 0.4848, "step": 6304 }, { "epoch": 1.5957985320172108, "grad_norm": 0.1607006937265396, "learning_rate": 1.0852540403355556e-05, "loss": 0.4822, "step": 6305 }, { "epoch": 1.5960516324981018, "grad_norm": 0.1760634481906891, "learning_rate": 1.0850154771982903e-05, "loss": 0.4909, "step": 6306 }, { "epoch": 1.5963047329789926, "grad_norm": 0.16456623375415802, "learning_rate": 1.0847769091872678e-05, "loss": 0.4784, "step": 6307 }, { "epoch": 1.5965578334598836, "grad_norm": 0.17230308055877686, "learning_rate": 1.084538336316165e-05, "loss": 0.5184, "step": 6308 }, { "epoch": 1.5968109339407746, "grad_norm": 0.1627141833305359, "learning_rate": 1.084299758598659e-05, "loss": 0.4775, "step": 6309 }, { "epoch": 1.5970640344216656, "grad_norm": 0.16715025901794434, "learning_rate": 1.084061176048426e-05, "loss": 0.4719, "step": 6310 }, { "epoch": 1.5973171349025563, "grad_norm": 0.16247397661209106, "learning_rate": 1.083822588679145e-05, "loss": 0.4741, "step": 6311 }, { "epoch": 1.5975702353834471, "grad_norm": 0.17189329862594604, "learning_rate": 1.083583996504492e-05, "loss": 0.4815, "step": 6312 }, { "epoch": 1.5978233358643381, "grad_norm": 0.16352054476737976, "learning_rate": 1.083345399538146e-05, "loss": 0.4663, "step": 6313 }, { "epoch": 1.5980764363452291, "grad_norm": 0.16255415976047516, "learning_rate": 1.0831067977937852e-05, "loss": 0.4912, "step": 6314 }, { "epoch": 1.5983295368261201, "grad_norm": 0.1605660319328308, "learning_rate": 1.082868191285088e-05, "loss": 0.475, "step": 6315 }, { "epoch": 1.598582637307011, "grad_norm": 0.16437660157680511, "learning_rate": 1.082629580025733e-05, "loss": 0.4999, "step": 6316 }, { "epoch": 1.5988357377879017, "grad_norm": 0.1642684042453766, "learning_rate": 1.0823909640293995e-05, "loss": 0.4864, "step": 6317 }, { "epoch": 1.5990888382687927, "grad_norm": 0.1635955274105072, "learning_rate": 1.0821523433097667e-05, "loss": 0.486, "step": 6318 }, { "epoch": 1.5993419387496837, "grad_norm": 0.15988510847091675, "learning_rate": 1.0819137178805146e-05, "loss": 0.5284, "step": 6319 }, { "epoch": 1.5995950392305747, "grad_norm": 0.16159094870090485, "learning_rate": 1.0816750877553226e-05, "loss": 0.4612, "step": 6320 }, { "epoch": 1.5998481397114654, "grad_norm": 0.1594904512166977, "learning_rate": 1.0814364529478712e-05, "loss": 0.4739, "step": 6321 }, { "epoch": 1.6001012401923562, "grad_norm": 0.16472937166690826, "learning_rate": 1.0811978134718406e-05, "loss": 0.4782, "step": 6322 }, { "epoch": 1.6003543406732472, "grad_norm": 0.16599130630493164, "learning_rate": 1.0809591693409115e-05, "loss": 0.4809, "step": 6323 }, { "epoch": 1.6006074411541382, "grad_norm": 0.16239234805107117, "learning_rate": 1.0807205205687652e-05, "loss": 0.515, "step": 6324 }, { "epoch": 1.6008605416350292, "grad_norm": 0.1671607345342636, "learning_rate": 1.0804818671690826e-05, "loss": 0.4645, "step": 6325 }, { "epoch": 1.60111364211592, "grad_norm": 0.1881597340106964, "learning_rate": 1.080243209155545e-05, "loss": 0.489, "step": 6326 }, { "epoch": 1.6013667425968108, "grad_norm": 0.16736231744289398, "learning_rate": 1.0800045465418348e-05, "loss": 0.4925, "step": 6327 }, { "epoch": 1.6016198430777018, "grad_norm": 0.16707713901996613, "learning_rate": 1.0797658793416336e-05, "loss": 0.4832, "step": 6328 }, { "epoch": 1.6018729435585928, "grad_norm": 0.1681615263223648, "learning_rate": 1.0795272075686234e-05, "loss": 0.4964, "step": 6329 }, { "epoch": 1.6021260440394838, "grad_norm": 0.1636541187763214, "learning_rate": 1.0792885312364874e-05, "loss": 0.501, "step": 6330 }, { "epoch": 1.6023791445203746, "grad_norm": 0.1618395894765854, "learning_rate": 1.0790498503589082e-05, "loss": 0.4826, "step": 6331 }, { "epoch": 1.6026322450012656, "grad_norm": 0.16339904069900513, "learning_rate": 1.0788111649495685e-05, "loss": 0.4999, "step": 6332 }, { "epoch": 1.6028853454821563, "grad_norm": 0.16121144592761993, "learning_rate": 1.078572475022152e-05, "loss": 0.5133, "step": 6333 }, { "epoch": 1.6031384459630473, "grad_norm": 0.16609080135822296, "learning_rate": 1.0783337805903425e-05, "loss": 0.481, "step": 6334 }, { "epoch": 1.6033915464439383, "grad_norm": 0.16675537824630737, "learning_rate": 1.0780950816678231e-05, "loss": 0.5309, "step": 6335 }, { "epoch": 1.603644646924829, "grad_norm": 0.17047196626663208, "learning_rate": 1.0778563782682783e-05, "loss": 0.5076, "step": 6336 }, { "epoch": 1.60389774740572, "grad_norm": 0.17192555963993073, "learning_rate": 1.0776176704053926e-05, "loss": 0.4949, "step": 6337 }, { "epoch": 1.6041508478866109, "grad_norm": 0.15998965501785278, "learning_rate": 1.0773789580928506e-05, "loss": 0.4987, "step": 6338 }, { "epoch": 1.6044039483675019, "grad_norm": 0.16549518704414368, "learning_rate": 1.077140241344337e-05, "loss": 0.4789, "step": 6339 }, { "epoch": 1.6046570488483929, "grad_norm": 0.15913161635398865, "learning_rate": 1.076901520173537e-05, "loss": 0.4982, "step": 6340 }, { "epoch": 1.6049101493292839, "grad_norm": 0.16498547792434692, "learning_rate": 1.0766627945941359e-05, "loss": 0.487, "step": 6341 }, { "epoch": 1.6051632498101747, "grad_norm": 0.16891741752624512, "learning_rate": 1.0764240646198193e-05, "loss": 0.493, "step": 6342 }, { "epoch": 1.6054163502910654, "grad_norm": 0.16785457730293274, "learning_rate": 1.0761853302642735e-05, "loss": 0.4598, "step": 6343 }, { "epoch": 1.6056694507719564, "grad_norm": 0.16510549187660217, "learning_rate": 1.0759465915411842e-05, "loss": 0.4959, "step": 6344 }, { "epoch": 1.6059225512528474, "grad_norm": 0.16433009505271912, "learning_rate": 1.0757078484642378e-05, "loss": 0.4655, "step": 6345 }, { "epoch": 1.6061756517337384, "grad_norm": 0.1626681089401245, "learning_rate": 1.0754691010471212e-05, "loss": 0.4838, "step": 6346 }, { "epoch": 1.6064287522146292, "grad_norm": 0.3393290638923645, "learning_rate": 1.0752303493035212e-05, "loss": 0.4712, "step": 6347 }, { "epoch": 1.60668185269552, "grad_norm": 0.16784979403018951, "learning_rate": 1.0749915932471244e-05, "loss": 0.4842, "step": 6348 }, { "epoch": 1.606934953176411, "grad_norm": 0.16649964451789856, "learning_rate": 1.074752832891619e-05, "loss": 0.4798, "step": 6349 }, { "epoch": 1.607188053657302, "grad_norm": 0.503620982170105, "learning_rate": 1.0745140682506925e-05, "loss": 0.5246, "step": 6350 }, { "epoch": 1.607441154138193, "grad_norm": 0.1659390777349472, "learning_rate": 1.0742752993380319e-05, "loss": 0.4891, "step": 6351 }, { "epoch": 1.6076942546190838, "grad_norm": 0.16889096796512604, "learning_rate": 1.0740365261673262e-05, "loss": 0.4917, "step": 6352 }, { "epoch": 1.6079473550999746, "grad_norm": 0.168401837348938, "learning_rate": 1.073797748752264e-05, "loss": 0.4835, "step": 6353 }, { "epoch": 1.6082004555808656, "grad_norm": 0.1613926887512207, "learning_rate": 1.0735589671065328e-05, "loss": 0.4812, "step": 6354 }, { "epoch": 1.6084535560617566, "grad_norm": 0.1640220433473587, "learning_rate": 1.0733201812438221e-05, "loss": 0.464, "step": 6355 }, { "epoch": 1.6087066565426476, "grad_norm": 0.17248466610908508, "learning_rate": 1.0730813911778212e-05, "loss": 0.468, "step": 6356 }, { "epoch": 1.6089597570235383, "grad_norm": 0.17084766924381256, "learning_rate": 1.0728425969222192e-05, "loss": 0.504, "step": 6357 }, { "epoch": 1.609212857504429, "grad_norm": 0.1591729372739792, "learning_rate": 1.0726037984907051e-05, "loss": 0.5026, "step": 6358 }, { "epoch": 1.60946595798532, "grad_norm": 0.16293412446975708, "learning_rate": 1.0723649958969699e-05, "loss": 0.4843, "step": 6359 }, { "epoch": 1.609719058466211, "grad_norm": 0.1637086421251297, "learning_rate": 1.0721261891547028e-05, "loss": 0.4636, "step": 6360 }, { "epoch": 1.609972158947102, "grad_norm": 0.1691388040781021, "learning_rate": 1.071887378277594e-05, "loss": 0.4752, "step": 6361 }, { "epoch": 1.6102252594279929, "grad_norm": 0.1638038158416748, "learning_rate": 1.0716485632793345e-05, "loss": 0.4977, "step": 6362 }, { "epoch": 1.6104783599088837, "grad_norm": 0.16804978251457214, "learning_rate": 1.0714097441736149e-05, "loss": 0.4658, "step": 6363 }, { "epoch": 1.6107314603897747, "grad_norm": 0.16412217915058136, "learning_rate": 1.071170920974126e-05, "loss": 0.4806, "step": 6364 }, { "epoch": 1.6109845608706657, "grad_norm": 0.163789764046669, "learning_rate": 1.0709320936945594e-05, "loss": 0.483, "step": 6365 }, { "epoch": 1.6112376613515567, "grad_norm": 0.16028600931167603, "learning_rate": 1.0706932623486062e-05, "loss": 0.5012, "step": 6366 }, { "epoch": 1.6114907618324474, "grad_norm": 0.16240644454956055, "learning_rate": 1.0704544269499581e-05, "loss": 0.472, "step": 6367 }, { "epoch": 1.6117438623133384, "grad_norm": 0.15985435247421265, "learning_rate": 1.0702155875123073e-05, "loss": 0.477, "step": 6368 }, { "epoch": 1.6119969627942292, "grad_norm": 0.17111191153526306, "learning_rate": 1.0699767440493459e-05, "loss": 0.4993, "step": 6369 }, { "epoch": 1.6122500632751202, "grad_norm": 0.1670607328414917, "learning_rate": 1.069737896574766e-05, "loss": 0.4842, "step": 6370 }, { "epoch": 1.6125031637560112, "grad_norm": 0.16078349947929382, "learning_rate": 1.0694990451022604e-05, "loss": 0.4907, "step": 6371 }, { "epoch": 1.6127562642369022, "grad_norm": 0.16775591671466827, "learning_rate": 1.0692601896455223e-05, "loss": 0.4911, "step": 6372 }, { "epoch": 1.613009364717793, "grad_norm": 0.16801805794239044, "learning_rate": 1.069021330218244e-05, "loss": 0.4936, "step": 6373 }, { "epoch": 1.6132624651986838, "grad_norm": 0.1634770631790161, "learning_rate": 1.0687824668341193e-05, "loss": 0.5084, "step": 6374 }, { "epoch": 1.6135155656795748, "grad_norm": 0.16892895102500916, "learning_rate": 1.0685435995068423e-05, "loss": 0.4923, "step": 6375 }, { "epoch": 1.6137686661604658, "grad_norm": 0.16627852618694305, "learning_rate": 1.0683047282501054e-05, "loss": 0.5046, "step": 6376 }, { "epoch": 1.6140217666413568, "grad_norm": 0.17879649996757507, "learning_rate": 1.0680658530776037e-05, "loss": 0.4757, "step": 6377 }, { "epoch": 1.6142748671222475, "grad_norm": 0.16169127821922302, "learning_rate": 1.067826974003031e-05, "loss": 0.4885, "step": 6378 }, { "epoch": 1.6145279676031383, "grad_norm": 0.16141067445278168, "learning_rate": 1.0675880910400816e-05, "loss": 0.4695, "step": 6379 }, { "epoch": 1.6147810680840293, "grad_norm": 0.16324187815189362, "learning_rate": 1.0673492042024503e-05, "loss": 0.4765, "step": 6380 }, { "epoch": 1.6150341685649203, "grad_norm": 0.19916489720344543, "learning_rate": 1.0671103135038323e-05, "loss": 0.4768, "step": 6381 }, { "epoch": 1.6152872690458113, "grad_norm": 0.1603168547153473, "learning_rate": 1.0668714189579221e-05, "loss": 0.4757, "step": 6382 }, { "epoch": 1.615540369526702, "grad_norm": 0.16187147796154022, "learning_rate": 1.0666325205784154e-05, "loss": 0.4658, "step": 6383 }, { "epoch": 1.6157934700075929, "grad_norm": 0.16598135232925415, "learning_rate": 1.0663936183790076e-05, "loss": 0.4596, "step": 6384 }, { "epoch": 1.6160465704884839, "grad_norm": 0.16725674271583557, "learning_rate": 1.0661547123733944e-05, "loss": 0.4732, "step": 6385 }, { "epoch": 1.6162996709693749, "grad_norm": 0.16400107741355896, "learning_rate": 1.065915802575272e-05, "loss": 0.4923, "step": 6386 }, { "epoch": 1.6165527714502659, "grad_norm": 0.16227442026138306, "learning_rate": 1.0656768889983364e-05, "loss": 0.4897, "step": 6387 }, { "epoch": 1.6168058719311567, "grad_norm": 0.16491149365901947, "learning_rate": 1.0654379716562841e-05, "loss": 0.4834, "step": 6388 }, { "epoch": 1.6170589724120474, "grad_norm": 0.16584186255931854, "learning_rate": 1.0651990505628119e-05, "loss": 0.5055, "step": 6389 }, { "epoch": 1.6173120728929384, "grad_norm": 0.15855951607227325, "learning_rate": 1.064960125731616e-05, "loss": 0.4971, "step": 6390 }, { "epoch": 1.6175651733738294, "grad_norm": 0.16936393082141876, "learning_rate": 1.0647211971763941e-05, "loss": 0.4898, "step": 6391 }, { "epoch": 1.6178182738547204, "grad_norm": 0.1614636331796646, "learning_rate": 1.0644822649108433e-05, "loss": 0.494, "step": 6392 }, { "epoch": 1.6180713743356112, "grad_norm": 0.16062872111797333, "learning_rate": 1.0642433289486612e-05, "loss": 0.4817, "step": 6393 }, { "epoch": 1.618324474816502, "grad_norm": 0.1631610095500946, "learning_rate": 1.064004389303545e-05, "loss": 0.4831, "step": 6394 }, { "epoch": 1.618577575297393, "grad_norm": 0.1638704538345337, "learning_rate": 1.0637654459891932e-05, "loss": 0.4698, "step": 6395 }, { "epoch": 1.618830675778284, "grad_norm": 0.1718064844608307, "learning_rate": 1.0635264990193033e-05, "loss": 0.4642, "step": 6396 }, { "epoch": 1.619083776259175, "grad_norm": 0.17727258801460266, "learning_rate": 1.0632875484075744e-05, "loss": 0.4802, "step": 6397 }, { "epoch": 1.6193368767400658, "grad_norm": 0.17070575058460236, "learning_rate": 1.0630485941677043e-05, "loss": 0.4667, "step": 6398 }, { "epoch": 1.6195899772209568, "grad_norm": 0.1630602329969406, "learning_rate": 1.0628096363133919e-05, "loss": 0.504, "step": 6399 }, { "epoch": 1.6198430777018475, "grad_norm": 0.1623746007680893, "learning_rate": 1.0625706748583369e-05, "loss": 0.4853, "step": 6400 }, { "epoch": 1.6200961781827385, "grad_norm": 0.16590110957622528, "learning_rate": 1.0623317098162374e-05, "loss": 0.4872, "step": 6401 }, { "epoch": 1.6203492786636295, "grad_norm": 0.16285094618797302, "learning_rate": 1.062092741200793e-05, "loss": 0.4964, "step": 6402 }, { "epoch": 1.6206023791445203, "grad_norm": 0.16383999586105347, "learning_rate": 1.0618537690257038e-05, "loss": 0.4812, "step": 6403 }, { "epoch": 1.6208554796254113, "grad_norm": 0.16003647446632385, "learning_rate": 1.0616147933046692e-05, "loss": 0.4875, "step": 6404 }, { "epoch": 1.621108580106302, "grad_norm": 0.16631536185741425, "learning_rate": 1.0613758140513887e-05, "loss": 0.5118, "step": 6405 }, { "epoch": 1.621361680587193, "grad_norm": 0.1661251038312912, "learning_rate": 1.0611368312795638e-05, "loss": 0.4951, "step": 6406 }, { "epoch": 1.621614781068084, "grad_norm": 0.16541333496570587, "learning_rate": 1.0608978450028938e-05, "loss": 0.492, "step": 6407 }, { "epoch": 1.621867881548975, "grad_norm": 0.1628897339105606, "learning_rate": 1.0606588552350791e-05, "loss": 0.5041, "step": 6408 }, { "epoch": 1.6221209820298659, "grad_norm": 0.15881727635860443, "learning_rate": 1.0604198619898212e-05, "loss": 0.4528, "step": 6409 }, { "epoch": 1.6223740825107567, "grad_norm": 0.16560178995132446, "learning_rate": 1.0601808652808207e-05, "loss": 0.4777, "step": 6410 }, { "epoch": 1.6226271829916477, "grad_norm": 0.16145041584968567, "learning_rate": 1.059941865121779e-05, "loss": 0.4669, "step": 6411 }, { "epoch": 1.6228802834725387, "grad_norm": 0.17241038382053375, "learning_rate": 1.059702861526397e-05, "loss": 0.484, "step": 6412 }, { "epoch": 1.6231333839534297, "grad_norm": 0.1647508144378662, "learning_rate": 1.0594638545083771e-05, "loss": 0.4827, "step": 6413 }, { "epoch": 1.6233864844343204, "grad_norm": 0.18377847969532013, "learning_rate": 1.05922484408142e-05, "loss": 0.4766, "step": 6414 }, { "epoch": 1.6236395849152112, "grad_norm": 0.16230468451976776, "learning_rate": 1.0589858302592287e-05, "loss": 0.4838, "step": 6415 }, { "epoch": 1.6238926853961022, "grad_norm": 0.16728287935256958, "learning_rate": 1.0587468130555046e-05, "loss": 0.4953, "step": 6416 }, { "epoch": 1.6241457858769932, "grad_norm": 0.16780558228492737, "learning_rate": 1.0585077924839503e-05, "loss": 0.46, "step": 6417 }, { "epoch": 1.6243988863578842, "grad_norm": 0.1646891087293625, "learning_rate": 1.0582687685582686e-05, "loss": 0.4713, "step": 6418 }, { "epoch": 1.624651986838775, "grad_norm": 0.16874434053897858, "learning_rate": 1.058029741292162e-05, "loss": 0.4739, "step": 6419 }, { "epoch": 1.6249050873196658, "grad_norm": 0.1634087711572647, "learning_rate": 1.0577907106993333e-05, "loss": 0.4805, "step": 6420 }, { "epoch": 1.6251581878005568, "grad_norm": 0.16411112248897552, "learning_rate": 1.0575516767934855e-05, "loss": 0.4821, "step": 6421 }, { "epoch": 1.6254112882814478, "grad_norm": 0.16301527619361877, "learning_rate": 1.0573126395883226e-05, "loss": 0.4772, "step": 6422 }, { "epoch": 1.6256643887623388, "grad_norm": 0.16700364649295807, "learning_rate": 1.0570735990975474e-05, "loss": 0.4869, "step": 6423 }, { "epoch": 1.6259174892432295, "grad_norm": 0.16590312123298645, "learning_rate": 1.0568345553348638e-05, "loss": 0.4851, "step": 6424 }, { "epoch": 1.6261705897241203, "grad_norm": 0.16520194709300995, "learning_rate": 1.0565955083139759e-05, "loss": 0.4913, "step": 6425 }, { "epoch": 1.6264236902050113, "grad_norm": 0.1683422476053238, "learning_rate": 1.0563564580485875e-05, "loss": 0.4633, "step": 6426 }, { "epoch": 1.6266767906859023, "grad_norm": 0.16467171907424927, "learning_rate": 1.0561174045524025e-05, "loss": 0.4809, "step": 6427 }, { "epoch": 1.6269298911667933, "grad_norm": 0.15941908955574036, "learning_rate": 1.0558783478391264e-05, "loss": 0.4699, "step": 6428 }, { "epoch": 1.627182991647684, "grad_norm": 0.16698838770389557, "learning_rate": 1.0556392879224627e-05, "loss": 0.5003, "step": 6429 }, { "epoch": 1.627436092128575, "grad_norm": 0.168549582362175, "learning_rate": 1.0554002248161166e-05, "loss": 0.4913, "step": 6430 }, { "epoch": 1.6276891926094659, "grad_norm": 0.16628582775592804, "learning_rate": 1.0551611585337933e-05, "loss": 0.4956, "step": 6431 }, { "epoch": 1.6279422930903569, "grad_norm": 0.164076566696167, "learning_rate": 1.0549220890891978e-05, "loss": 0.497, "step": 6432 }, { "epoch": 1.6281953935712479, "grad_norm": 0.16330455243587494, "learning_rate": 1.0546830164960355e-05, "loss": 0.4793, "step": 6433 }, { "epoch": 1.6284484940521387, "grad_norm": 0.1651241034269333, "learning_rate": 1.0544439407680119e-05, "loss": 0.4909, "step": 6434 }, { "epoch": 1.6287015945330297, "grad_norm": 0.16546548902988434, "learning_rate": 1.0542048619188323e-05, "loss": 0.5125, "step": 6435 }, { "epoch": 1.6289546950139204, "grad_norm": 0.18306930363178253, "learning_rate": 1.0539657799622034e-05, "loss": 0.4869, "step": 6436 }, { "epoch": 1.6292077954948114, "grad_norm": 0.17738115787506104, "learning_rate": 1.0537266949118305e-05, "loss": 0.4993, "step": 6437 }, { "epoch": 1.6294608959757024, "grad_norm": 0.17014524340629578, "learning_rate": 1.0534876067814202e-05, "loss": 0.4828, "step": 6438 }, { "epoch": 1.6297139964565934, "grad_norm": 0.16328056156635284, "learning_rate": 1.053248515584679e-05, "loss": 0.4848, "step": 6439 }, { "epoch": 1.6299670969374842, "grad_norm": 0.1621844470500946, "learning_rate": 1.0530094213353132e-05, "loss": 0.507, "step": 6440 }, { "epoch": 1.630220197418375, "grad_norm": 0.1597716361284256, "learning_rate": 1.0527703240470298e-05, "loss": 0.4583, "step": 6441 }, { "epoch": 1.630473297899266, "grad_norm": 0.163211390376091, "learning_rate": 1.0525312237335359e-05, "loss": 0.4663, "step": 6442 }, { "epoch": 1.630726398380157, "grad_norm": 0.15889106690883636, "learning_rate": 1.0522921204085378e-05, "loss": 0.4627, "step": 6443 }, { "epoch": 1.630979498861048, "grad_norm": 0.16469278931617737, "learning_rate": 1.052053014085744e-05, "loss": 0.4744, "step": 6444 }, { "epoch": 1.6312325993419388, "grad_norm": 0.1654670238494873, "learning_rate": 1.051813904778861e-05, "loss": 0.4869, "step": 6445 }, { "epoch": 1.6314856998228295, "grad_norm": 0.16368399560451508, "learning_rate": 1.0515747925015964e-05, "loss": 0.4618, "step": 6446 }, { "epoch": 1.6317388003037205, "grad_norm": 0.17221558094024658, "learning_rate": 1.051335677267659e-05, "loss": 0.4756, "step": 6447 }, { "epoch": 1.6319919007846115, "grad_norm": 0.1608695536851883, "learning_rate": 1.0510965590907557e-05, "loss": 0.4819, "step": 6448 }, { "epoch": 1.6322450012655025, "grad_norm": 0.1601473093032837, "learning_rate": 1.050857437984595e-05, "loss": 0.4603, "step": 6449 }, { "epoch": 1.6324981017463933, "grad_norm": 0.1605224311351776, "learning_rate": 1.0506183139628856e-05, "loss": 0.4501, "step": 6450 }, { "epoch": 1.632751202227284, "grad_norm": 0.16206097602844238, "learning_rate": 1.0503791870393357e-05, "loss": 0.4492, "step": 6451 }, { "epoch": 1.633004302708175, "grad_norm": 0.16135594248771667, "learning_rate": 1.0501400572276532e-05, "loss": 0.5043, "step": 6452 }, { "epoch": 1.633257403189066, "grad_norm": 0.16312934458255768, "learning_rate": 1.0499009245415484e-05, "loss": 0.4557, "step": 6453 }, { "epoch": 1.633510503669957, "grad_norm": 0.15932174026966095, "learning_rate": 1.0496617889947289e-05, "loss": 0.4577, "step": 6454 }, { "epoch": 1.6337636041508479, "grad_norm": 0.16310001909732819, "learning_rate": 1.0494226506009046e-05, "loss": 0.4821, "step": 6455 }, { "epoch": 1.6340167046317386, "grad_norm": 0.15945255756378174, "learning_rate": 1.0491835093737846e-05, "loss": 0.4702, "step": 6456 }, { "epoch": 1.6342698051126296, "grad_norm": 0.1639392226934433, "learning_rate": 1.0489443653270782e-05, "loss": 0.4594, "step": 6457 }, { "epoch": 1.6345229055935206, "grad_norm": 0.17947381734848022, "learning_rate": 1.0487052184744952e-05, "loss": 0.5073, "step": 6458 }, { "epoch": 1.6347760060744116, "grad_norm": 0.18437668681144714, "learning_rate": 1.0484660688297454e-05, "loss": 0.5011, "step": 6459 }, { "epoch": 1.6350291065553024, "grad_norm": 0.16972844302654266, "learning_rate": 1.0482269164065386e-05, "loss": 0.4952, "step": 6460 }, { "epoch": 1.6352822070361932, "grad_norm": 0.1658020317554474, "learning_rate": 1.047987761218585e-05, "loss": 0.4678, "step": 6461 }, { "epoch": 1.6355353075170842, "grad_norm": 0.17698204517364502, "learning_rate": 1.047748603279595e-05, "loss": 0.4509, "step": 6462 }, { "epoch": 1.6357884079979752, "grad_norm": 0.16558781266212463, "learning_rate": 1.0475094426032786e-05, "loss": 0.4913, "step": 6463 }, { "epoch": 1.6360415084788662, "grad_norm": 0.16313891112804413, "learning_rate": 1.0472702792033471e-05, "loss": 0.5032, "step": 6464 }, { "epoch": 1.636294608959757, "grad_norm": 0.16434034705162048, "learning_rate": 1.0470311130935102e-05, "loss": 0.4746, "step": 6465 }, { "epoch": 1.636547709440648, "grad_norm": 0.1669696718454361, "learning_rate": 1.0467919442874794e-05, "loss": 0.5146, "step": 6466 }, { "epoch": 1.6368008099215388, "grad_norm": 0.18842066824436188, "learning_rate": 1.0465527727989662e-05, "loss": 0.4765, "step": 6467 }, { "epoch": 1.6370539104024298, "grad_norm": 0.20375564694404602, "learning_rate": 1.0463135986416809e-05, "loss": 0.4862, "step": 6468 }, { "epoch": 1.6373070108833208, "grad_norm": 0.16720786690711975, "learning_rate": 1.0460744218293354e-05, "loss": 0.4944, "step": 6469 }, { "epoch": 1.6375601113642118, "grad_norm": 0.16929112374782562, "learning_rate": 1.0458352423756408e-05, "loss": 0.4542, "step": 6470 }, { "epoch": 1.6378132118451025, "grad_norm": 0.1578352302312851, "learning_rate": 1.0455960602943089e-05, "loss": 0.4754, "step": 6471 }, { "epoch": 1.6380663123259933, "grad_norm": 0.17077761888504028, "learning_rate": 1.0453568755990519e-05, "loss": 0.4791, "step": 6472 }, { "epoch": 1.6383194128068843, "grad_norm": 0.16899342834949493, "learning_rate": 1.0451176883035813e-05, "loss": 0.4826, "step": 6473 }, { "epoch": 1.6385725132877753, "grad_norm": 0.16910310089588165, "learning_rate": 1.0448784984216095e-05, "loss": 0.4916, "step": 6474 }, { "epoch": 1.6388256137686663, "grad_norm": 0.16285063326358795, "learning_rate": 1.0446393059668485e-05, "loss": 0.463, "step": 6475 }, { "epoch": 1.639078714249557, "grad_norm": 0.1642293930053711, "learning_rate": 1.0444001109530106e-05, "loss": 0.521, "step": 6476 }, { "epoch": 1.6393318147304479, "grad_norm": 0.16829738020896912, "learning_rate": 1.0441609133938089e-05, "loss": 0.4809, "step": 6477 }, { "epoch": 1.6395849152113389, "grad_norm": 0.16353902220726013, "learning_rate": 1.0439217133029553e-05, "loss": 0.4548, "step": 6478 }, { "epoch": 1.6398380156922299, "grad_norm": 0.16692467033863068, "learning_rate": 1.0436825106941633e-05, "loss": 0.5117, "step": 6479 }, { "epoch": 1.6400911161731209, "grad_norm": 0.16683128476142883, "learning_rate": 1.0434433055811455e-05, "loss": 0.4856, "step": 6480 }, { "epoch": 1.6403442166540116, "grad_norm": 0.1691824197769165, "learning_rate": 1.043204097977615e-05, "loss": 0.5043, "step": 6481 }, { "epoch": 1.6405973171349024, "grad_norm": 0.19450968503952026, "learning_rate": 1.0429648878972856e-05, "loss": 0.482, "step": 6482 }, { "epoch": 1.6408504176157934, "grad_norm": 0.1683594286441803, "learning_rate": 1.04272567535387e-05, "loss": 0.4966, "step": 6483 }, { "epoch": 1.6411035180966844, "grad_norm": 0.16466739773750305, "learning_rate": 1.042486460361082e-05, "loss": 0.4968, "step": 6484 }, { "epoch": 1.6413566185775754, "grad_norm": 0.16729380190372467, "learning_rate": 1.0422472429326356e-05, "loss": 0.5123, "step": 6485 }, { "epoch": 1.6416097190584662, "grad_norm": 0.16863501071929932, "learning_rate": 1.0420080230822445e-05, "loss": 0.5223, "step": 6486 }, { "epoch": 1.641862819539357, "grad_norm": 0.1724105030298233, "learning_rate": 1.0417688008236222e-05, "loss": 0.5012, "step": 6487 }, { "epoch": 1.642115920020248, "grad_norm": 0.167232945561409, "learning_rate": 1.0415295761704833e-05, "loss": 0.4959, "step": 6488 }, { "epoch": 1.642369020501139, "grad_norm": 0.1650419980287552, "learning_rate": 1.041290349136542e-05, "loss": 0.4652, "step": 6489 }, { "epoch": 1.64262212098203, "grad_norm": 0.18379326164722443, "learning_rate": 1.0410511197355122e-05, "loss": 0.4817, "step": 6490 }, { "epoch": 1.6428752214629208, "grad_norm": 0.1659395694732666, "learning_rate": 1.0408118879811092e-05, "loss": 0.4826, "step": 6491 }, { "epoch": 1.6431283219438115, "grad_norm": 0.15953634679317474, "learning_rate": 1.0405726538870471e-05, "loss": 0.4809, "step": 6492 }, { "epoch": 1.6433814224247025, "grad_norm": 0.20569534599781036, "learning_rate": 1.0403334174670405e-05, "loss": 0.4891, "step": 6493 }, { "epoch": 1.6436345229055935, "grad_norm": 0.16619640588760376, "learning_rate": 1.0400941787348054e-05, "loss": 0.5008, "step": 6494 }, { "epoch": 1.6438876233864845, "grad_norm": 0.16435833275318146, "learning_rate": 1.0398549377040555e-05, "loss": 0.489, "step": 6495 }, { "epoch": 1.6441407238673753, "grad_norm": 0.16698089241981506, "learning_rate": 1.0396156943885068e-05, "loss": 0.4706, "step": 6496 }, { "epoch": 1.6443938243482663, "grad_norm": 0.17071092128753662, "learning_rate": 1.0393764488018741e-05, "loss": 0.4654, "step": 6497 }, { "epoch": 1.644646924829157, "grad_norm": 0.16696478426456451, "learning_rate": 1.0391372009578733e-05, "loss": 0.4905, "step": 6498 }, { "epoch": 1.644900025310048, "grad_norm": 0.179615780711174, "learning_rate": 1.0388979508702197e-05, "loss": 0.5008, "step": 6499 }, { "epoch": 1.645153125790939, "grad_norm": 0.16045281291007996, "learning_rate": 1.038658698552629e-05, "loss": 0.4656, "step": 6500 }, { "epoch": 1.6454062262718299, "grad_norm": 0.16938720643520355, "learning_rate": 1.0384194440188174e-05, "loss": 0.5075, "step": 6501 }, { "epoch": 1.6456593267527209, "grad_norm": 0.1665305644273758, "learning_rate": 1.0381801872825004e-05, "loss": 0.4973, "step": 6502 }, { "epoch": 1.6459124272336116, "grad_norm": 0.16728119552135468, "learning_rate": 1.0379409283573943e-05, "loss": 0.4834, "step": 6503 }, { "epoch": 1.6461655277145026, "grad_norm": 0.16428104043006897, "learning_rate": 1.037701667257215e-05, "loss": 0.4652, "step": 6504 }, { "epoch": 1.6464186281953936, "grad_norm": 0.16495588421821594, "learning_rate": 1.0374624039956795e-05, "loss": 0.4908, "step": 6505 }, { "epoch": 1.6466717286762846, "grad_norm": 0.16018804907798767, "learning_rate": 1.0372231385865038e-05, "loss": 0.4673, "step": 6506 }, { "epoch": 1.6469248291571754, "grad_norm": 0.1677820086479187, "learning_rate": 1.0369838710434044e-05, "loss": 0.5147, "step": 6507 }, { "epoch": 1.6471779296380662, "grad_norm": 0.1697719246149063, "learning_rate": 1.0367446013800981e-05, "loss": 0.4757, "step": 6508 }, { "epoch": 1.6474310301189572, "grad_norm": 0.1626783013343811, "learning_rate": 1.0365053296103016e-05, "loss": 0.47, "step": 6509 }, { "epoch": 1.6476841305998482, "grad_norm": 0.16522379219532013, "learning_rate": 1.0362660557477322e-05, "loss": 0.4791, "step": 6510 }, { "epoch": 1.6479372310807392, "grad_norm": 0.1682816445827484, "learning_rate": 1.036026779806107e-05, "loss": 0.4812, "step": 6511 }, { "epoch": 1.64819033156163, "grad_norm": 0.16544701159000397, "learning_rate": 1.0357875017991423e-05, "loss": 0.5011, "step": 6512 }, { "epoch": 1.6484434320425208, "grad_norm": 0.16344676911830902, "learning_rate": 1.0355482217405563e-05, "loss": 0.4729, "step": 6513 }, { "epoch": 1.6486965325234118, "grad_norm": 0.1700427383184433, "learning_rate": 1.0353089396440665e-05, "loss": 0.4585, "step": 6514 }, { "epoch": 1.6489496330043028, "grad_norm": 0.17079134285449982, "learning_rate": 1.0350696555233896e-05, "loss": 0.5101, "step": 6515 }, { "epoch": 1.6492027334851938, "grad_norm": 0.17222771048545837, "learning_rate": 1.0348303693922444e-05, "loss": 0.4743, "step": 6516 }, { "epoch": 1.6494558339660845, "grad_norm": 0.16187003254890442, "learning_rate": 1.0345910812643473e-05, "loss": 0.4825, "step": 6517 }, { "epoch": 1.6497089344469753, "grad_norm": 0.16513465344905853, "learning_rate": 1.0343517911534172e-05, "loss": 0.47, "step": 6518 }, { "epoch": 1.6499620349278663, "grad_norm": 0.16086463630199432, "learning_rate": 1.034112499073172e-05, "loss": 0.4903, "step": 6519 }, { "epoch": 1.6502151354087573, "grad_norm": 0.16726012527942657, "learning_rate": 1.0338732050373292e-05, "loss": 0.4804, "step": 6520 }, { "epoch": 1.6504682358896483, "grad_norm": 0.1704609990119934, "learning_rate": 1.0336339090596075e-05, "loss": 0.4826, "step": 6521 }, { "epoch": 1.650721336370539, "grad_norm": 0.16430461406707764, "learning_rate": 1.0333946111537251e-05, "loss": 0.4887, "step": 6522 }, { "epoch": 1.6509744368514299, "grad_norm": 0.16909049451351166, "learning_rate": 1.0331553113334005e-05, "loss": 0.4982, "step": 6523 }, { "epoch": 1.6512275373323209, "grad_norm": 0.19604510068893433, "learning_rate": 1.0329160096123525e-05, "loss": 0.4935, "step": 6524 }, { "epoch": 1.6514806378132119, "grad_norm": 0.1673625260591507, "learning_rate": 1.0326767060042993e-05, "loss": 0.5114, "step": 6525 }, { "epoch": 1.6517337382941029, "grad_norm": 0.17060984671115875, "learning_rate": 1.0324374005229596e-05, "loss": 0.4759, "step": 6526 }, { "epoch": 1.6519868387749936, "grad_norm": 0.1661737710237503, "learning_rate": 1.032198093182053e-05, "loss": 0.476, "step": 6527 }, { "epoch": 1.6522399392558846, "grad_norm": 0.15979188680648804, "learning_rate": 1.0319587839952976e-05, "loss": 0.4786, "step": 6528 }, { "epoch": 1.6524930397367754, "grad_norm": 0.16349539160728455, "learning_rate": 1.0317194729764131e-05, "loss": 0.4746, "step": 6529 }, { "epoch": 1.6527461402176664, "grad_norm": 0.16427184641361237, "learning_rate": 1.031480160139119e-05, "loss": 0.4808, "step": 6530 }, { "epoch": 1.6529992406985574, "grad_norm": 0.16232997179031372, "learning_rate": 1.0312408454971332e-05, "loss": 0.4964, "step": 6531 }, { "epoch": 1.6532523411794482, "grad_norm": 0.17339912056922913, "learning_rate": 1.0310015290641764e-05, "loss": 0.4556, "step": 6532 }, { "epoch": 1.6535054416603392, "grad_norm": 0.16949467360973358, "learning_rate": 1.0307622108539679e-05, "loss": 0.4966, "step": 6533 }, { "epoch": 1.65375854214123, "grad_norm": 0.16865158081054688, "learning_rate": 1.0305228908802267e-05, "loss": 0.4883, "step": 6534 }, { "epoch": 1.654011642622121, "grad_norm": 0.1649271547794342, "learning_rate": 1.0302835691566731e-05, "loss": 0.4656, "step": 6535 }, { "epoch": 1.654264743103012, "grad_norm": 0.1638229638338089, "learning_rate": 1.030044245697027e-05, "loss": 0.4875, "step": 6536 }, { "epoch": 1.654517843583903, "grad_norm": 0.1700860559940338, "learning_rate": 1.0298049205150073e-05, "loss": 0.4565, "step": 6537 }, { "epoch": 1.6547709440647937, "grad_norm": 0.16411447525024414, "learning_rate": 1.0295655936243354e-05, "loss": 0.4755, "step": 6538 }, { "epoch": 1.6550240445456845, "grad_norm": 0.16787472367286682, "learning_rate": 1.0293262650387306e-05, "loss": 0.4814, "step": 6539 }, { "epoch": 1.6552771450265755, "grad_norm": 0.16860461235046387, "learning_rate": 1.0290869347719133e-05, "loss": 0.4954, "step": 6540 }, { "epoch": 1.6555302455074665, "grad_norm": 0.16954800486564636, "learning_rate": 1.0288476028376035e-05, "loss": 0.4883, "step": 6541 }, { "epoch": 1.6557833459883575, "grad_norm": 0.15975667536258698, "learning_rate": 1.0286082692495219e-05, "loss": 0.4939, "step": 6542 }, { "epoch": 1.6560364464692483, "grad_norm": 0.1712201088666916, "learning_rate": 1.028368934021389e-05, "loss": 0.5035, "step": 6543 }, { "epoch": 1.656289546950139, "grad_norm": 0.16285812854766846, "learning_rate": 1.0281295971669252e-05, "loss": 0.4952, "step": 6544 }, { "epoch": 1.65654264743103, "grad_norm": 0.1656414121389389, "learning_rate": 1.0278902586998511e-05, "loss": 0.4898, "step": 6545 }, { "epoch": 1.656795747911921, "grad_norm": 0.16474592685699463, "learning_rate": 1.0276509186338878e-05, "loss": 0.4722, "step": 6546 }, { "epoch": 1.657048848392812, "grad_norm": 0.17329148948192596, "learning_rate": 1.027411576982756e-05, "loss": 0.4746, "step": 6547 }, { "epoch": 1.6573019488737029, "grad_norm": 0.17137201130390167, "learning_rate": 1.0271722337601766e-05, "loss": 0.4739, "step": 6548 }, { "epoch": 1.6575550493545936, "grad_norm": 0.16153445839881897, "learning_rate": 1.0269328889798709e-05, "loss": 0.5036, "step": 6549 }, { "epoch": 1.6578081498354846, "grad_norm": 0.1685321033000946, "learning_rate": 1.0266935426555595e-05, "loss": 0.4854, "step": 6550 }, { "epoch": 1.6580612503163756, "grad_norm": 0.16567754745483398, "learning_rate": 1.026454194800964e-05, "loss": 0.4903, "step": 6551 }, { "epoch": 1.6583143507972666, "grad_norm": 0.16636590659618378, "learning_rate": 1.0262148454298058e-05, "loss": 0.4689, "step": 6552 }, { "epoch": 1.6585674512781574, "grad_norm": 0.16946977376937866, "learning_rate": 1.0259754945558058e-05, "loss": 0.5072, "step": 6553 }, { "epoch": 1.6588205517590482, "grad_norm": 0.16598814725875854, "learning_rate": 1.025736142192686e-05, "loss": 0.4978, "step": 6554 }, { "epoch": 1.6590736522399392, "grad_norm": 0.16429299116134644, "learning_rate": 1.025496788354168e-05, "loss": 0.4818, "step": 6555 }, { "epoch": 1.6593267527208302, "grad_norm": 0.1641968935728073, "learning_rate": 1.0252574330539727e-05, "loss": 0.4753, "step": 6556 }, { "epoch": 1.6595798532017212, "grad_norm": 0.17046082019805908, "learning_rate": 1.0250180763058225e-05, "loss": 0.4885, "step": 6557 }, { "epoch": 1.659832953682612, "grad_norm": 0.1681009829044342, "learning_rate": 1.0247787181234395e-05, "loss": 0.5032, "step": 6558 }, { "epoch": 1.6600860541635027, "grad_norm": 0.16540563106536865, "learning_rate": 1.0245393585205445e-05, "loss": 0.466, "step": 6559 }, { "epoch": 1.6603391546443937, "grad_norm": 0.16385814547538757, "learning_rate": 1.0242999975108605e-05, "loss": 0.4717, "step": 6560 }, { "epoch": 1.6605922551252847, "grad_norm": 0.16431789100170135, "learning_rate": 1.0240606351081092e-05, "loss": 0.4581, "step": 6561 }, { "epoch": 1.6608453556061757, "grad_norm": 0.1776125282049179, "learning_rate": 1.0238212713260127e-05, "loss": 0.4924, "step": 6562 }, { "epoch": 1.6610984560870665, "grad_norm": 0.16428431868553162, "learning_rate": 1.023581906178293e-05, "loss": 0.4823, "step": 6563 }, { "epoch": 1.6613515565679575, "grad_norm": 0.18001788854599, "learning_rate": 1.0233425396786729e-05, "loss": 0.494, "step": 6564 }, { "epoch": 1.6616046570488483, "grad_norm": 0.16217070817947388, "learning_rate": 1.0231031718408747e-05, "loss": 0.461, "step": 6565 }, { "epoch": 1.6618577575297393, "grad_norm": 0.16861163079738617, "learning_rate": 1.0228638026786202e-05, "loss": 0.4686, "step": 6566 }, { "epoch": 1.6621108580106303, "grad_norm": 0.16440781950950623, "learning_rate": 1.0226244322056327e-05, "loss": 0.4904, "step": 6567 }, { "epoch": 1.6623639584915213, "grad_norm": 0.17058250308036804, "learning_rate": 1.0223850604356343e-05, "loss": 0.4816, "step": 6568 }, { "epoch": 1.662617058972412, "grad_norm": 0.16382546722888947, "learning_rate": 1.0221456873823481e-05, "loss": 0.4954, "step": 6569 }, { "epoch": 1.6628701594533029, "grad_norm": 0.16671383380889893, "learning_rate": 1.0219063130594964e-05, "loss": 0.4776, "step": 6570 }, { "epoch": 1.6631232599341939, "grad_norm": 0.16534936428070068, "learning_rate": 1.0216669374808024e-05, "loss": 0.482, "step": 6571 }, { "epoch": 1.6633763604150849, "grad_norm": 0.164045050740242, "learning_rate": 1.0214275606599885e-05, "loss": 0.4568, "step": 6572 }, { "epoch": 1.6636294608959759, "grad_norm": 0.16754242777824402, "learning_rate": 1.0211881826107784e-05, "loss": 0.4813, "step": 6573 }, { "epoch": 1.6638825613768666, "grad_norm": 0.1737896054983139, "learning_rate": 1.0209488033468947e-05, "loss": 0.4917, "step": 6574 }, { "epoch": 1.6641356618577574, "grad_norm": 0.1776164323091507, "learning_rate": 1.0207094228820604e-05, "loss": 0.4756, "step": 6575 }, { "epoch": 1.6643887623386484, "grad_norm": 0.16487328708171844, "learning_rate": 1.0204700412299988e-05, "loss": 0.489, "step": 6576 }, { "epoch": 1.6646418628195394, "grad_norm": 0.16539083421230316, "learning_rate": 1.0202306584044335e-05, "loss": 0.497, "step": 6577 }, { "epoch": 1.6648949633004304, "grad_norm": 0.17299501597881317, "learning_rate": 1.019991274419087e-05, "loss": 0.4787, "step": 6578 }, { "epoch": 1.6651480637813212, "grad_norm": 0.1646791398525238, "learning_rate": 1.0197518892876831e-05, "loss": 0.4875, "step": 6579 }, { "epoch": 1.665401164262212, "grad_norm": 0.1672816425561905, "learning_rate": 1.0195125030239459e-05, "loss": 0.4879, "step": 6580 }, { "epoch": 1.665654264743103, "grad_norm": 0.16498036682605743, "learning_rate": 1.0192731156415979e-05, "loss": 0.47, "step": 6581 }, { "epoch": 1.665907365223994, "grad_norm": 0.16670481860637665, "learning_rate": 1.0190337271543627e-05, "loss": 0.5096, "step": 6582 }, { "epoch": 1.666160465704885, "grad_norm": 0.16463348269462585, "learning_rate": 1.018794337575965e-05, "loss": 0.4783, "step": 6583 }, { "epoch": 1.6664135661857757, "grad_norm": 0.16325923800468445, "learning_rate": 1.0185549469201275e-05, "loss": 0.4431, "step": 6584 }, { "epoch": 1.6666666666666665, "grad_norm": 0.16708500683307648, "learning_rate": 1.0183155552005739e-05, "loss": 0.4911, "step": 6585 }, { "epoch": 1.6669197671475575, "grad_norm": 0.16956649720668793, "learning_rate": 1.018076162431029e-05, "loss": 0.4696, "step": 6586 }, { "epoch": 1.6671728676284485, "grad_norm": 0.16520076990127563, "learning_rate": 1.0178367686252155e-05, "loss": 0.4735, "step": 6587 }, { "epoch": 1.6674259681093395, "grad_norm": 0.1658865511417389, "learning_rate": 1.017597373796858e-05, "loss": 0.4772, "step": 6588 }, { "epoch": 1.6676790685902303, "grad_norm": 0.16315299272537231, "learning_rate": 1.0173579779596807e-05, "loss": 0.4827, "step": 6589 }, { "epoch": 1.667932169071121, "grad_norm": 0.1656765192747116, "learning_rate": 1.0171185811274071e-05, "loss": 0.4813, "step": 6590 }, { "epoch": 1.668185269552012, "grad_norm": 0.15856415033340454, "learning_rate": 1.0168791833137616e-05, "loss": 0.483, "step": 6591 }, { "epoch": 1.668438370032903, "grad_norm": 0.16418509185314178, "learning_rate": 1.0166397845324685e-05, "loss": 0.4834, "step": 6592 }, { "epoch": 1.668691470513794, "grad_norm": 0.16905026137828827, "learning_rate": 1.0164003847972516e-05, "loss": 0.4797, "step": 6593 }, { "epoch": 1.6689445709946849, "grad_norm": 0.16234169900417328, "learning_rate": 1.0161609841218356e-05, "loss": 0.4563, "step": 6594 }, { "epoch": 1.6691976714755759, "grad_norm": 0.16777168214321136, "learning_rate": 1.0159215825199445e-05, "loss": 0.4372, "step": 6595 }, { "epoch": 1.6694507719564666, "grad_norm": 0.16247735917568207, "learning_rate": 1.0156821800053029e-05, "loss": 0.4656, "step": 6596 }, { "epoch": 1.6697038724373576, "grad_norm": 0.16702060401439667, "learning_rate": 1.0154427765916352e-05, "loss": 0.5, "step": 6597 }, { "epoch": 1.6699569729182486, "grad_norm": 0.16535183787345886, "learning_rate": 1.015203372292666e-05, "loss": 0.4919, "step": 6598 }, { "epoch": 1.6702100733991394, "grad_norm": 0.1639549434185028, "learning_rate": 1.0149639671221195e-05, "loss": 0.5003, "step": 6599 }, { "epoch": 1.6704631738800304, "grad_norm": 0.16373208165168762, "learning_rate": 1.0147245610937206e-05, "loss": 0.4556, "step": 6600 }, { "epoch": 1.6707162743609212, "grad_norm": 0.1701332926750183, "learning_rate": 1.0144851542211937e-05, "loss": 0.5019, "step": 6601 }, { "epoch": 1.6709693748418122, "grad_norm": 0.16678565740585327, "learning_rate": 1.0142457465182643e-05, "loss": 0.49, "step": 6602 }, { "epoch": 1.6712224753227032, "grad_norm": 0.1728842705488205, "learning_rate": 1.0140063379986558e-05, "loss": 0.5021, "step": 6603 }, { "epoch": 1.6714755758035942, "grad_norm": 0.1666422337293625, "learning_rate": 1.0137669286760936e-05, "loss": 0.494, "step": 6604 }, { "epoch": 1.671728676284485, "grad_norm": 0.16603250801563263, "learning_rate": 1.013527518564303e-05, "loss": 0.4662, "step": 6605 }, { "epoch": 1.6719817767653757, "grad_norm": 0.1662132441997528, "learning_rate": 1.0132881076770083e-05, "loss": 0.5011, "step": 6606 }, { "epoch": 1.6722348772462667, "grad_norm": 0.16920559108257294, "learning_rate": 1.0130486960279342e-05, "loss": 0.4917, "step": 6607 }, { "epoch": 1.6724879777271577, "grad_norm": 0.16269411146640778, "learning_rate": 1.0128092836308067e-05, "loss": 0.4532, "step": 6608 }, { "epoch": 1.6727410782080487, "grad_norm": 0.18413688242435455, "learning_rate": 1.01256987049935e-05, "loss": 0.4855, "step": 6609 }, { "epoch": 1.6729941786889395, "grad_norm": 0.16375282406806946, "learning_rate": 1.0123304566472887e-05, "loss": 0.4727, "step": 6610 }, { "epoch": 1.6732472791698303, "grad_norm": 0.1742761731147766, "learning_rate": 1.012091042088349e-05, "loss": 0.4708, "step": 6611 }, { "epoch": 1.6735003796507213, "grad_norm": 0.16869811713695526, "learning_rate": 1.0118516268362555e-05, "loss": 0.4755, "step": 6612 }, { "epoch": 1.6737534801316123, "grad_norm": 0.16262778639793396, "learning_rate": 1.0116122109047333e-05, "loss": 0.4568, "step": 6613 }, { "epoch": 1.6740065806125033, "grad_norm": 0.16283927857875824, "learning_rate": 1.0113727943075076e-05, "loss": 0.455, "step": 6614 }, { "epoch": 1.674259681093394, "grad_norm": 0.1654614359140396, "learning_rate": 1.0111333770583036e-05, "loss": 0.4687, "step": 6615 }, { "epoch": 1.6745127815742848, "grad_norm": 0.16387887299060822, "learning_rate": 1.010893959170847e-05, "loss": 0.4668, "step": 6616 }, { "epoch": 1.6747658820551758, "grad_norm": 0.16168934106826782, "learning_rate": 1.0106545406588626e-05, "loss": 0.4874, "step": 6617 }, { "epoch": 1.6750189825360668, "grad_norm": 0.16694727540016174, "learning_rate": 1.0104151215360761e-05, "loss": 0.4816, "step": 6618 }, { "epoch": 1.6752720830169578, "grad_norm": 0.16928888857364655, "learning_rate": 1.0101757018162129e-05, "loss": 0.467, "step": 6619 }, { "epoch": 1.6755251834978486, "grad_norm": 0.16537810862064362, "learning_rate": 1.0099362815129981e-05, "loss": 0.4551, "step": 6620 }, { "epoch": 1.6757782839787394, "grad_norm": 0.16906258463859558, "learning_rate": 1.0096968606401574e-05, "loss": 0.4704, "step": 6621 }, { "epoch": 1.6760313844596304, "grad_norm": 0.16235089302062988, "learning_rate": 1.0094574392114162e-05, "loss": 0.472, "step": 6622 }, { "epoch": 1.6762844849405214, "grad_norm": 0.16621248424053192, "learning_rate": 1.0092180172405001e-05, "loss": 0.4948, "step": 6623 }, { "epoch": 1.6765375854214124, "grad_norm": 0.17018724977970123, "learning_rate": 1.0089785947411346e-05, "loss": 0.4991, "step": 6624 }, { "epoch": 1.6767906859023032, "grad_norm": 0.1670583188533783, "learning_rate": 1.0087391717270456e-05, "loss": 0.4993, "step": 6625 }, { "epoch": 1.6770437863831942, "grad_norm": 0.17157302796840668, "learning_rate": 1.008499748211958e-05, "loss": 0.5029, "step": 6626 }, { "epoch": 1.677296886864085, "grad_norm": 0.17077776789665222, "learning_rate": 1.0082603242095983e-05, "loss": 0.5014, "step": 6627 }, { "epoch": 1.677549987344976, "grad_norm": 0.16426844894886017, "learning_rate": 1.0080208997336915e-05, "loss": 0.4861, "step": 6628 }, { "epoch": 1.677803087825867, "grad_norm": 0.16428504884243011, "learning_rate": 1.0077814747979632e-05, "loss": 0.4711, "step": 6629 }, { "epoch": 1.6780561883067577, "grad_norm": 0.16575239598751068, "learning_rate": 1.0075420494161401e-05, "loss": 0.4791, "step": 6630 }, { "epoch": 1.6783092887876487, "grad_norm": 0.18052203953266144, "learning_rate": 1.0073026236019469e-05, "loss": 0.4659, "step": 6631 }, { "epoch": 1.6785623892685395, "grad_norm": 0.1708793342113495, "learning_rate": 1.0070631973691096e-05, "loss": 0.4857, "step": 6632 }, { "epoch": 1.6788154897494305, "grad_norm": 0.16819846630096436, "learning_rate": 1.0068237707313549e-05, "loss": 0.49, "step": 6633 }, { "epoch": 1.6790685902303215, "grad_norm": 0.16096514463424683, "learning_rate": 1.0065843437024073e-05, "loss": 0.4786, "step": 6634 }, { "epoch": 1.6793216907112125, "grad_norm": 0.16638940572738647, "learning_rate": 1.006344916295993e-05, "loss": 0.4808, "step": 6635 }, { "epoch": 1.6795747911921033, "grad_norm": 0.16440163552761078, "learning_rate": 1.0061054885258387e-05, "loss": 0.461, "step": 6636 }, { "epoch": 1.679827891672994, "grad_norm": 0.1671404391527176, "learning_rate": 1.0058660604056696e-05, "loss": 0.4832, "step": 6637 }, { "epoch": 1.680080992153885, "grad_norm": 0.16705477237701416, "learning_rate": 1.0056266319492116e-05, "loss": 0.501, "step": 6638 }, { "epoch": 1.680334092634776, "grad_norm": 0.16367262601852417, "learning_rate": 1.0053872031701908e-05, "loss": 0.4706, "step": 6639 }, { "epoch": 1.680587193115667, "grad_norm": 0.16617998480796814, "learning_rate": 1.0051477740823329e-05, "loss": 0.4813, "step": 6640 }, { "epoch": 1.6808402935965578, "grad_norm": 0.1652643233537674, "learning_rate": 1.0049083446993642e-05, "loss": 0.4901, "step": 6641 }, { "epoch": 1.6810933940774486, "grad_norm": 0.16600656509399414, "learning_rate": 1.0046689150350105e-05, "loss": 0.4898, "step": 6642 }, { "epoch": 1.6813464945583396, "grad_norm": 0.16219359636306763, "learning_rate": 1.0044294851029977e-05, "loss": 0.4828, "step": 6643 }, { "epoch": 1.6815995950392306, "grad_norm": 0.1676824688911438, "learning_rate": 1.0041900549170518e-05, "loss": 0.4887, "step": 6644 }, { "epoch": 1.6818526955201216, "grad_norm": 0.16331003606319427, "learning_rate": 1.0039506244908991e-05, "loss": 0.4928, "step": 6645 }, { "epoch": 1.6821057960010124, "grad_norm": 0.1689000427722931, "learning_rate": 1.0037111938382655e-05, "loss": 0.4753, "step": 6646 }, { "epoch": 1.6823588964819032, "grad_norm": 0.16601362824440002, "learning_rate": 1.0034717629728771e-05, "loss": 0.4862, "step": 6647 }, { "epoch": 1.6826119969627942, "grad_norm": 0.15860538184642792, "learning_rate": 1.0032323319084596e-05, "loss": 0.4815, "step": 6648 }, { "epoch": 1.6828650974436852, "grad_norm": 0.20804719626903534, "learning_rate": 1.0029929006587395e-05, "loss": 0.4603, "step": 6649 }, { "epoch": 1.6831181979245762, "grad_norm": 0.17256712913513184, "learning_rate": 1.0027534692374433e-05, "loss": 0.4897, "step": 6650 }, { "epoch": 1.683371298405467, "grad_norm": 0.1653810441493988, "learning_rate": 1.0025140376582955e-05, "loss": 0.4846, "step": 6651 }, { "epoch": 1.6836243988863577, "grad_norm": 0.16640476882457733, "learning_rate": 1.002274605935024e-05, "loss": 0.4873, "step": 6652 }, { "epoch": 1.6838774993672487, "grad_norm": 0.1601630449295044, "learning_rate": 1.0020351740813541e-05, "loss": 0.4707, "step": 6653 }, { "epoch": 1.6841305998481397, "grad_norm": 0.17152933776378632, "learning_rate": 1.0017957421110118e-05, "loss": 0.4747, "step": 6654 }, { "epoch": 1.6843837003290307, "grad_norm": 0.16990770399570465, "learning_rate": 1.0015563100377236e-05, "loss": 0.4737, "step": 6655 }, { "epoch": 1.6846368008099215, "grad_norm": 0.16354316473007202, "learning_rate": 1.0013168778752154e-05, "loss": 0.4943, "step": 6656 }, { "epoch": 1.6848899012908123, "grad_norm": 0.1694018691778183, "learning_rate": 1.0010774456372133e-05, "loss": 0.4779, "step": 6657 }, { "epoch": 1.6851430017717033, "grad_norm": 0.16853436827659607, "learning_rate": 1.0008380133374436e-05, "loss": 0.4811, "step": 6658 }, { "epoch": 1.6853961022525943, "grad_norm": 0.16283589601516724, "learning_rate": 1.0005985809896322e-05, "loss": 0.4667, "step": 6659 }, { "epoch": 1.6856492027334853, "grad_norm": 0.16645179688930511, "learning_rate": 1.0003591486075055e-05, "loss": 0.4812, "step": 6660 }, { "epoch": 1.685902303214376, "grad_norm": 0.165795236825943, "learning_rate": 1.0001197162047896e-05, "loss": 0.4935, "step": 6661 }, { "epoch": 1.686155403695267, "grad_norm": 0.15969063341617584, "learning_rate": 9.998802837952109e-06, "loss": 0.4555, "step": 6662 }, { "epoch": 1.6864085041761578, "grad_norm": 0.1645396202802658, "learning_rate": 9.996408513924949e-06, "loss": 0.5003, "step": 6663 }, { "epoch": 1.6866616046570488, "grad_norm": 0.16735287010669708, "learning_rate": 9.994014190103681e-06, "loss": 0.4992, "step": 6664 }, { "epoch": 1.6869147051379398, "grad_norm": 0.17049001157283783, "learning_rate": 9.991619866625568e-06, "loss": 0.5034, "step": 6665 }, { "epoch": 1.6871678056188308, "grad_norm": 0.1666191816329956, "learning_rate": 9.98922554362787e-06, "loss": 0.4757, "step": 6666 }, { "epoch": 1.6874209060997216, "grad_norm": 0.1680452525615692, "learning_rate": 9.986831221247847e-06, "loss": 0.4767, "step": 6667 }, { "epoch": 1.6876740065806124, "grad_norm": 0.17063866555690765, "learning_rate": 9.984436899622769e-06, "loss": 0.4794, "step": 6668 }, { "epoch": 1.6879271070615034, "grad_norm": 0.16262304782867432, "learning_rate": 9.982042578889886e-06, "loss": 0.486, "step": 6669 }, { "epoch": 1.6881802075423944, "grad_norm": 0.16497191786766052, "learning_rate": 9.97964825918646e-06, "loss": 0.468, "step": 6670 }, { "epoch": 1.6884333080232854, "grad_norm": 0.16641773283481598, "learning_rate": 9.977253940649761e-06, "loss": 0.4691, "step": 6671 }, { "epoch": 1.6886864085041762, "grad_norm": 0.16768650710582733, "learning_rate": 9.974859623417043e-06, "loss": 0.4842, "step": 6672 }, { "epoch": 1.688939508985067, "grad_norm": 0.19884181022644043, "learning_rate": 9.972465307625574e-06, "loss": 0.5025, "step": 6673 }, { "epoch": 1.689192609465958, "grad_norm": 0.16546206176280975, "learning_rate": 9.970070993412606e-06, "loss": 0.4897, "step": 6674 }, { "epoch": 1.689445709946849, "grad_norm": 0.16708070039749146, "learning_rate": 9.967676680915406e-06, "loss": 0.4718, "step": 6675 }, { "epoch": 1.68969881042774, "grad_norm": 0.16563139855861664, "learning_rate": 9.96528237027123e-06, "loss": 0.4905, "step": 6676 }, { "epoch": 1.6899519109086307, "grad_norm": 0.17121510207653046, "learning_rate": 9.962888061617347e-06, "loss": 0.4502, "step": 6677 }, { "epoch": 1.6902050113895215, "grad_norm": 0.1641940027475357, "learning_rate": 9.96049375509101e-06, "loss": 0.4545, "step": 6678 }, { "epoch": 1.6904581118704125, "grad_norm": 0.16728518903255463, "learning_rate": 9.958099450829484e-06, "loss": 0.4821, "step": 6679 }, { "epoch": 1.6907112123513035, "grad_norm": 0.16693606972694397, "learning_rate": 9.955705148970026e-06, "loss": 0.4869, "step": 6680 }, { "epoch": 1.6909643128321945, "grad_norm": 0.16625340282917023, "learning_rate": 9.953310849649899e-06, "loss": 0.4975, "step": 6681 }, { "epoch": 1.6912174133130853, "grad_norm": 0.16272206604480743, "learning_rate": 9.950916553006361e-06, "loss": 0.4596, "step": 6682 }, { "epoch": 1.691470513793976, "grad_norm": 0.1606796532869339, "learning_rate": 9.948522259176673e-06, "loss": 0.5073, "step": 6683 }, { "epoch": 1.691723614274867, "grad_norm": 0.16920018196105957, "learning_rate": 9.946127968298097e-06, "loss": 0.4679, "step": 6684 }, { "epoch": 1.691976714755758, "grad_norm": 0.1787392795085907, "learning_rate": 9.943733680507887e-06, "loss": 0.4656, "step": 6685 }, { "epoch": 1.692229815236649, "grad_norm": 0.16333836317062378, "learning_rate": 9.941339395943306e-06, "loss": 0.4518, "step": 6686 }, { "epoch": 1.6924829157175398, "grad_norm": 0.1635415256023407, "learning_rate": 9.938945114741614e-06, "loss": 0.5318, "step": 6687 }, { "epoch": 1.6927360161984306, "grad_norm": 0.1791018694639206, "learning_rate": 9.936550837040068e-06, "loss": 0.4767, "step": 6688 }, { "epoch": 1.6929891166793216, "grad_norm": 0.16817252337932587, "learning_rate": 9.934156562975927e-06, "loss": 0.4886, "step": 6689 }, { "epoch": 1.6932422171602126, "grad_norm": 0.16615697741508484, "learning_rate": 9.931762292686458e-06, "loss": 0.4961, "step": 6690 }, { "epoch": 1.6934953176411036, "grad_norm": 0.17466376721858978, "learning_rate": 9.929368026308905e-06, "loss": 0.5004, "step": 6691 }, { "epoch": 1.6937484181219944, "grad_norm": 0.20411403477191925, "learning_rate": 9.926973763980533e-06, "loss": 0.4968, "step": 6692 }, { "epoch": 1.6940015186028854, "grad_norm": 0.17095451056957245, "learning_rate": 9.924579505838602e-06, "loss": 0.4848, "step": 6693 }, { "epoch": 1.6942546190837762, "grad_norm": 0.16433238983154297, "learning_rate": 9.922185252020368e-06, "loss": 0.4653, "step": 6694 }, { "epoch": 1.6945077195646672, "grad_norm": 0.17238998413085938, "learning_rate": 9.91979100266309e-06, "loss": 0.4698, "step": 6695 }, { "epoch": 1.6947608200455582, "grad_norm": 0.16616874933242798, "learning_rate": 9.917396757904022e-06, "loss": 0.4691, "step": 6696 }, { "epoch": 1.695013920526449, "grad_norm": 0.17066103219985962, "learning_rate": 9.915002517880423e-06, "loss": 0.4826, "step": 6697 }, { "epoch": 1.69526702100734, "grad_norm": 0.1687217801809311, "learning_rate": 9.912608282729546e-06, "loss": 0.4993, "step": 6698 }, { "epoch": 1.6955201214882307, "grad_norm": 0.17679457366466522, "learning_rate": 9.910214052588654e-06, "loss": 0.4663, "step": 6699 }, { "epoch": 1.6957732219691217, "grad_norm": 0.16550226509571075, "learning_rate": 9.907819827595e-06, "loss": 0.5054, "step": 6700 }, { "epoch": 1.6960263224500127, "grad_norm": 0.16564461588859558, "learning_rate": 9.905425607885841e-06, "loss": 0.4875, "step": 6701 }, { "epoch": 1.6962794229309037, "grad_norm": 0.16371895372867584, "learning_rate": 9.90303139359843e-06, "loss": 0.4851, "step": 6702 }, { "epoch": 1.6965325234117945, "grad_norm": 0.16883070766925812, "learning_rate": 9.900637184870024e-06, "loss": 0.4912, "step": 6703 }, { "epoch": 1.6967856238926853, "grad_norm": 0.16586099565029144, "learning_rate": 9.898242981837876e-06, "loss": 0.4945, "step": 6704 }, { "epoch": 1.6970387243735763, "grad_norm": 0.1616733968257904, "learning_rate": 9.895848784639239e-06, "loss": 0.4747, "step": 6705 }, { "epoch": 1.6972918248544673, "grad_norm": 0.16851146519184113, "learning_rate": 9.89345459341138e-06, "loss": 0.4561, "step": 6706 }, { "epoch": 1.6975449253353583, "grad_norm": 0.1606042981147766, "learning_rate": 9.891060408291534e-06, "loss": 0.4571, "step": 6707 }, { "epoch": 1.697798025816249, "grad_norm": 0.16802017390727997, "learning_rate": 9.888666229416967e-06, "loss": 0.4674, "step": 6708 }, { "epoch": 1.6980511262971398, "grad_norm": 0.16574536263942719, "learning_rate": 9.886272056924928e-06, "loss": 0.4877, "step": 6709 }, { "epoch": 1.6983042267780308, "grad_norm": 0.1648353785276413, "learning_rate": 9.883877890952672e-06, "loss": 0.4814, "step": 6710 }, { "epoch": 1.6985573272589218, "grad_norm": 0.1752125322818756, "learning_rate": 9.881483731637446e-06, "loss": 0.4974, "step": 6711 }, { "epoch": 1.6988104277398128, "grad_norm": 0.17129115760326385, "learning_rate": 9.879089579116514e-06, "loss": 0.4878, "step": 6712 }, { "epoch": 1.6990635282207036, "grad_norm": 0.2325967401266098, "learning_rate": 9.876695433527117e-06, "loss": 0.4752, "step": 6713 }, { "epoch": 1.6993166287015944, "grad_norm": 0.1652916967868805, "learning_rate": 9.874301295006506e-06, "loss": 0.501, "step": 6714 }, { "epoch": 1.6995697291824854, "grad_norm": 0.17356817424297333, "learning_rate": 9.871907163691936e-06, "loss": 0.466, "step": 6715 }, { "epoch": 1.6998228296633764, "grad_norm": 0.16770945489406586, "learning_rate": 9.869513039720658e-06, "loss": 0.4864, "step": 6716 }, { "epoch": 1.7000759301442674, "grad_norm": 0.1693647801876068, "learning_rate": 9.867118923229922e-06, "loss": 0.4838, "step": 6717 }, { "epoch": 1.7003290306251582, "grad_norm": 0.16800692677497864, "learning_rate": 9.864724814356975e-06, "loss": 0.4843, "step": 6718 }, { "epoch": 1.700582131106049, "grad_norm": 0.16143889725208282, "learning_rate": 9.862330713239066e-06, "loss": 0.4669, "step": 6719 }, { "epoch": 1.70083523158694, "grad_norm": 0.16857996582984924, "learning_rate": 9.859936620013445e-06, "loss": 0.4699, "step": 6720 }, { "epoch": 1.701088332067831, "grad_norm": 0.17103347182273865, "learning_rate": 9.857542534817362e-06, "loss": 0.4933, "step": 6721 }, { "epoch": 1.701341432548722, "grad_norm": 0.16735640168190002, "learning_rate": 9.855148457788061e-06, "loss": 0.4731, "step": 6722 }, { "epoch": 1.7015945330296127, "grad_norm": 0.16397055983543396, "learning_rate": 9.852754389062796e-06, "loss": 0.4594, "step": 6723 }, { "epoch": 1.7018476335105037, "grad_norm": 0.16474549472332, "learning_rate": 9.850360328778807e-06, "loss": 0.4872, "step": 6724 }, { "epoch": 1.7021007339913945, "grad_norm": 0.16547179222106934, "learning_rate": 9.847966277073344e-06, "loss": 0.4927, "step": 6725 }, { "epoch": 1.7023538344722855, "grad_norm": 0.16945572197437286, "learning_rate": 9.845572234083651e-06, "loss": 0.4906, "step": 6726 }, { "epoch": 1.7026069349531765, "grad_norm": 0.16430552303791046, "learning_rate": 9.843178199946971e-06, "loss": 0.4726, "step": 6727 }, { "epoch": 1.7028600354340673, "grad_norm": 0.16466625034809113, "learning_rate": 9.840784174800557e-06, "loss": 0.4918, "step": 6728 }, { "epoch": 1.7031131359149583, "grad_norm": 0.17729248106479645, "learning_rate": 9.838390158781649e-06, "loss": 0.516, "step": 6729 }, { "epoch": 1.703366236395849, "grad_norm": 0.1601092368364334, "learning_rate": 9.835996152027487e-06, "loss": 0.4639, "step": 6730 }, { "epoch": 1.70361933687674, "grad_norm": 0.19843044877052307, "learning_rate": 9.833602154675318e-06, "loss": 0.4784, "step": 6731 }, { "epoch": 1.703872437357631, "grad_norm": 0.18928463757038116, "learning_rate": 9.831208166862386e-06, "loss": 0.4856, "step": 6732 }, { "epoch": 1.704125537838522, "grad_norm": 0.16349957883358002, "learning_rate": 9.828814188725932e-06, "loss": 0.4765, "step": 6733 }, { "epoch": 1.7043786383194128, "grad_norm": 0.16658152639865875, "learning_rate": 9.826420220403198e-06, "loss": 0.4465, "step": 6734 }, { "epoch": 1.7046317388003036, "grad_norm": 0.16773591935634613, "learning_rate": 9.824026262031423e-06, "loss": 0.4596, "step": 6735 }, { "epoch": 1.7048848392811946, "grad_norm": 0.16384100914001465, "learning_rate": 9.821632313747846e-06, "loss": 0.4736, "step": 6736 }, { "epoch": 1.7051379397620856, "grad_norm": 0.1736845225095749, "learning_rate": 9.819238375689715e-06, "loss": 0.5051, "step": 6737 }, { "epoch": 1.7053910402429766, "grad_norm": 0.16284964978694916, "learning_rate": 9.816844447994261e-06, "loss": 0.458, "step": 6738 }, { "epoch": 1.7056441407238674, "grad_norm": 0.16199667751789093, "learning_rate": 9.81445053079873e-06, "loss": 0.4715, "step": 6739 }, { "epoch": 1.7058972412047582, "grad_norm": 0.16989412903785706, "learning_rate": 9.812056624240356e-06, "loss": 0.4973, "step": 6740 }, { "epoch": 1.7061503416856492, "grad_norm": 0.16839297115802765, "learning_rate": 9.809662728456374e-06, "loss": 0.4864, "step": 6741 }, { "epoch": 1.7064034421665402, "grad_norm": 0.16431137919425964, "learning_rate": 9.807268843584025e-06, "loss": 0.476, "step": 6742 }, { "epoch": 1.7066565426474312, "grad_norm": 0.1692715436220169, "learning_rate": 9.804874969760544e-06, "loss": 0.496, "step": 6743 }, { "epoch": 1.706909643128322, "grad_norm": 0.16611900925636292, "learning_rate": 9.802481107123169e-06, "loss": 0.4659, "step": 6744 }, { "epoch": 1.7071627436092127, "grad_norm": 0.1909535974264145, "learning_rate": 9.800087255809135e-06, "loss": 0.4644, "step": 6745 }, { "epoch": 1.7074158440901037, "grad_norm": 0.17573301494121552, "learning_rate": 9.797693415955671e-06, "loss": 0.4898, "step": 6746 }, { "epoch": 1.7076689445709947, "grad_norm": 0.1666879653930664, "learning_rate": 9.795299587700014e-06, "loss": 0.4833, "step": 6747 }, { "epoch": 1.7079220450518857, "grad_norm": 0.16969414055347443, "learning_rate": 9.7929057711794e-06, "loss": 0.4775, "step": 6748 }, { "epoch": 1.7081751455327765, "grad_norm": 0.16922268271446228, "learning_rate": 9.790511966531055e-06, "loss": 0.4626, "step": 6749 }, { "epoch": 1.7084282460136673, "grad_norm": 0.1622764766216278, "learning_rate": 9.788118173892216e-06, "loss": 0.4714, "step": 6750 }, { "epoch": 1.7086813464945583, "grad_norm": 0.16972225904464722, "learning_rate": 9.785724393400116e-06, "loss": 0.4847, "step": 6751 }, { "epoch": 1.7089344469754493, "grad_norm": 0.16474832594394684, "learning_rate": 9.78333062519198e-06, "loss": 0.4695, "step": 6752 }, { "epoch": 1.7091875474563403, "grad_norm": 0.1628236025571823, "learning_rate": 9.780936869405039e-06, "loss": 0.4763, "step": 6753 }, { "epoch": 1.709440647937231, "grad_norm": 0.17027485370635986, "learning_rate": 9.778543126176522e-06, "loss": 0.482, "step": 6754 }, { "epoch": 1.7096937484181218, "grad_norm": 0.16747045516967773, "learning_rate": 9.776149395643659e-06, "loss": 0.4788, "step": 6755 }, { "epoch": 1.7099468488990128, "grad_norm": 0.17557711899280548, "learning_rate": 9.773755677943678e-06, "loss": 0.51, "step": 6756 }, { "epoch": 1.7101999493799038, "grad_norm": 0.1650334596633911, "learning_rate": 9.771361973213801e-06, "loss": 0.4624, "step": 6757 }, { "epoch": 1.7104530498607948, "grad_norm": 0.16917835175991058, "learning_rate": 9.768968281591258e-06, "loss": 0.481, "step": 6758 }, { "epoch": 1.7107061503416856, "grad_norm": 0.16867350041866302, "learning_rate": 9.766574603213273e-06, "loss": 0.5177, "step": 6759 }, { "epoch": 1.7109592508225766, "grad_norm": 0.16809771955013275, "learning_rate": 9.764180938217071e-06, "loss": 0.4774, "step": 6760 }, { "epoch": 1.7112123513034674, "grad_norm": 0.17253640294075012, "learning_rate": 9.761787286739875e-06, "loss": 0.5016, "step": 6761 }, { "epoch": 1.7114654517843584, "grad_norm": 0.16404761373996735, "learning_rate": 9.759393648918913e-06, "loss": 0.482, "step": 6762 }, { "epoch": 1.7117185522652494, "grad_norm": 0.4515800178050995, "learning_rate": 9.757000024891398e-06, "loss": 0.4581, "step": 6763 }, { "epoch": 1.7119716527461404, "grad_norm": 0.17216229438781738, "learning_rate": 9.754606414794558e-06, "loss": 0.4935, "step": 6764 }, { "epoch": 1.7122247532270312, "grad_norm": 0.1682025045156479, "learning_rate": 9.752212818765609e-06, "loss": 0.4918, "step": 6765 }, { "epoch": 1.712477853707922, "grad_norm": 0.1658981591463089, "learning_rate": 9.749819236941776e-06, "loss": 0.4854, "step": 6766 }, { "epoch": 1.712730954188813, "grad_norm": 0.17206192016601562, "learning_rate": 9.747425669460277e-06, "loss": 0.5189, "step": 6767 }, { "epoch": 1.712984054669704, "grad_norm": 0.16246172785758972, "learning_rate": 9.745032116458324e-06, "loss": 0.4761, "step": 6768 }, { "epoch": 1.713237155150595, "grad_norm": 0.16881144046783447, "learning_rate": 9.742638578073142e-06, "loss": 0.4921, "step": 6769 }, { "epoch": 1.7134902556314857, "grad_norm": 0.1692323088645935, "learning_rate": 9.740245054441945e-06, "loss": 0.4819, "step": 6770 }, { "epoch": 1.7137433561123765, "grad_norm": 0.16364654898643494, "learning_rate": 9.737851545701943e-06, "loss": 0.4716, "step": 6771 }, { "epoch": 1.7139964565932675, "grad_norm": 0.16666771471500397, "learning_rate": 9.735458051990361e-06, "loss": 0.4769, "step": 6772 }, { "epoch": 1.7142495570741585, "grad_norm": 0.17118433117866516, "learning_rate": 9.73306457344441e-06, "loss": 0.4916, "step": 6773 }, { "epoch": 1.7145026575550495, "grad_norm": 0.1891862452030182, "learning_rate": 9.730671110201296e-06, "loss": 0.4901, "step": 6774 }, { "epoch": 1.7147557580359403, "grad_norm": 0.17678427696228027, "learning_rate": 9.728277662398237e-06, "loss": 0.4771, "step": 6775 }, { "epoch": 1.715008858516831, "grad_norm": 0.16176097095012665, "learning_rate": 9.725884230172442e-06, "loss": 0.4944, "step": 6776 }, { "epoch": 1.715261958997722, "grad_norm": 0.16756665706634521, "learning_rate": 9.723490813661123e-06, "loss": 0.4637, "step": 6777 }, { "epoch": 1.715515059478613, "grad_norm": 0.16679075360298157, "learning_rate": 9.721097413001494e-06, "loss": 0.4691, "step": 6778 }, { "epoch": 1.715768159959504, "grad_norm": 0.17277531325817108, "learning_rate": 9.718704028330755e-06, "loss": 0.4733, "step": 6779 }, { "epoch": 1.7160212604403948, "grad_norm": 0.16970132291316986, "learning_rate": 9.716310659786114e-06, "loss": 0.4881, "step": 6780 }, { "epoch": 1.7162743609212856, "grad_norm": 0.1655857264995575, "learning_rate": 9.713917307504785e-06, "loss": 0.474, "step": 6781 }, { "epoch": 1.7165274614021766, "grad_norm": 0.17521385848522186, "learning_rate": 9.711523971623968e-06, "loss": 0.5097, "step": 6782 }, { "epoch": 1.7167805618830676, "grad_norm": 0.16833017766475677, "learning_rate": 9.70913065228087e-06, "loss": 0.4928, "step": 6783 }, { "epoch": 1.7170336623639586, "grad_norm": 0.16971883177757263, "learning_rate": 9.706737349612697e-06, "loss": 0.5069, "step": 6784 }, { "epoch": 1.7172867628448494, "grad_norm": 0.17262811958789825, "learning_rate": 9.70434406375665e-06, "loss": 0.4701, "step": 6785 }, { "epoch": 1.7175398633257402, "grad_norm": 0.1674506664276123, "learning_rate": 9.701950794849929e-06, "loss": 0.4707, "step": 6786 }, { "epoch": 1.7177929638066312, "grad_norm": 0.16839599609375, "learning_rate": 9.699557543029734e-06, "loss": 0.4844, "step": 6787 }, { "epoch": 1.7180460642875222, "grad_norm": 0.1650979220867157, "learning_rate": 9.69716430843327e-06, "loss": 0.4817, "step": 6788 }, { "epoch": 1.7182991647684132, "grad_norm": 0.16211150586605072, "learning_rate": 9.694771091197737e-06, "loss": 0.4865, "step": 6789 }, { "epoch": 1.718552265249304, "grad_norm": 0.16909381747245789, "learning_rate": 9.692377891460326e-06, "loss": 0.4587, "step": 6790 }, { "epoch": 1.718805365730195, "grad_norm": 0.16401228308677673, "learning_rate": 9.689984709358238e-06, "loss": 0.4942, "step": 6791 }, { "epoch": 1.7190584662110857, "grad_norm": 0.16727478802204132, "learning_rate": 9.687591545028672e-06, "loss": 0.4689, "step": 6792 }, { "epoch": 1.7193115666919767, "grad_norm": 0.16636009514331818, "learning_rate": 9.685198398608815e-06, "loss": 0.4851, "step": 6793 }, { "epoch": 1.7195646671728677, "grad_norm": 0.1773521453142166, "learning_rate": 9.682805270235869e-06, "loss": 0.4683, "step": 6794 }, { "epoch": 1.7198177676537585, "grad_norm": 0.16708840429782867, "learning_rate": 9.680412160047029e-06, "loss": 0.482, "step": 6795 }, { "epoch": 1.7200708681346495, "grad_norm": 0.17562609910964966, "learning_rate": 9.678019068179474e-06, "loss": 0.4907, "step": 6796 }, { "epoch": 1.7203239686155403, "grad_norm": 0.16516806185245514, "learning_rate": 9.675625994770405e-06, "loss": 0.4975, "step": 6797 }, { "epoch": 1.7205770690964313, "grad_norm": 0.16803544759750366, "learning_rate": 9.673232939957012e-06, "loss": 0.4756, "step": 6798 }, { "epoch": 1.7208301695773223, "grad_norm": 0.16654779016971588, "learning_rate": 9.670839903876479e-06, "loss": 0.4844, "step": 6799 }, { "epoch": 1.7210832700582133, "grad_norm": 0.20831449329853058, "learning_rate": 9.668446886665995e-06, "loss": 0.4898, "step": 6800 }, { "epoch": 1.721336370539104, "grad_norm": 0.1639193445444107, "learning_rate": 9.666053888462754e-06, "loss": 0.4713, "step": 6801 }, { "epoch": 1.7215894710199948, "grad_norm": 0.16916580498218536, "learning_rate": 9.663660909403928e-06, "loss": 0.484, "step": 6802 }, { "epoch": 1.7218425715008858, "grad_norm": 0.1658213883638382, "learning_rate": 9.661267949626711e-06, "loss": 0.4946, "step": 6803 }, { "epoch": 1.7220956719817768, "grad_norm": 0.16539451479911804, "learning_rate": 9.658875009268285e-06, "loss": 0.4671, "step": 6804 }, { "epoch": 1.7223487724626678, "grad_norm": 0.17393343150615692, "learning_rate": 9.656482088465831e-06, "loss": 0.4873, "step": 6805 }, { "epoch": 1.7226018729435586, "grad_norm": 0.16385428607463837, "learning_rate": 9.654089187356532e-06, "loss": 0.4548, "step": 6806 }, { "epoch": 1.7228549734244494, "grad_norm": 0.16260235011577606, "learning_rate": 9.651696306077563e-06, "loss": 0.4566, "step": 6807 }, { "epoch": 1.7231080739053404, "grad_norm": 0.1821790486574173, "learning_rate": 9.649303444766107e-06, "loss": 0.486, "step": 6808 }, { "epoch": 1.7233611743862314, "grad_norm": 0.16815070807933807, "learning_rate": 9.646910603559337e-06, "loss": 0.502, "step": 6809 }, { "epoch": 1.7236142748671224, "grad_norm": 0.16375146806240082, "learning_rate": 9.644517782594435e-06, "loss": 0.4794, "step": 6810 }, { "epoch": 1.7238673753480132, "grad_norm": 0.1614801436662674, "learning_rate": 9.642124982008577e-06, "loss": 0.4935, "step": 6811 }, { "epoch": 1.724120475828904, "grad_norm": 0.1650935709476471, "learning_rate": 9.639732201938936e-06, "loss": 0.4854, "step": 6812 }, { "epoch": 1.724373576309795, "grad_norm": 0.1684967428445816, "learning_rate": 9.63733944252268e-06, "loss": 0.4954, "step": 6813 }, { "epoch": 1.724626676790686, "grad_norm": 0.18186303973197937, "learning_rate": 9.634946703896987e-06, "loss": 0.4908, "step": 6814 }, { "epoch": 1.724879777271577, "grad_norm": 0.17614726722240448, "learning_rate": 9.632553986199022e-06, "loss": 0.4848, "step": 6815 }, { "epoch": 1.7251328777524677, "grad_norm": 0.21927617490291595, "learning_rate": 9.630161289565958e-06, "loss": 0.4821, "step": 6816 }, { "epoch": 1.7253859782333585, "grad_norm": 0.16999921202659607, "learning_rate": 9.627768614134967e-06, "loss": 0.5025, "step": 6817 }, { "epoch": 1.7256390787142495, "grad_norm": 0.16675658524036407, "learning_rate": 9.625375960043208e-06, "loss": 0.4982, "step": 6818 }, { "epoch": 1.7258921791951405, "grad_norm": 0.16377942264080048, "learning_rate": 9.622983327427851e-06, "loss": 0.4769, "step": 6819 }, { "epoch": 1.7261452796760315, "grad_norm": 0.16608569025993347, "learning_rate": 9.62059071642606e-06, "loss": 0.4689, "step": 6820 }, { "epoch": 1.7263983801569223, "grad_norm": 0.16264139115810394, "learning_rate": 9.618198127174999e-06, "loss": 0.472, "step": 6821 }, { "epoch": 1.7266514806378133, "grad_norm": 0.17424054443836212, "learning_rate": 9.615805559811828e-06, "loss": 0.4733, "step": 6822 }, { "epoch": 1.726904581118704, "grad_norm": 0.16458122432231903, "learning_rate": 9.613413014473713e-06, "loss": 0.4865, "step": 6823 }, { "epoch": 1.727157681599595, "grad_norm": 0.16448380053043365, "learning_rate": 9.611020491297806e-06, "loss": 0.4575, "step": 6824 }, { "epoch": 1.727410782080486, "grad_norm": 0.1723375916481018, "learning_rate": 9.608627990421272e-06, "loss": 0.487, "step": 6825 }, { "epoch": 1.7276638825613768, "grad_norm": 0.16148555278778076, "learning_rate": 9.606235511981262e-06, "loss": 0.4619, "step": 6826 }, { "epoch": 1.7279169830422678, "grad_norm": 0.1635066270828247, "learning_rate": 9.603843056114937e-06, "loss": 0.4687, "step": 6827 }, { "epoch": 1.7281700835231586, "grad_norm": 0.17095163464546204, "learning_rate": 9.60145062295945e-06, "loss": 0.4561, "step": 6828 }, { "epoch": 1.7284231840040496, "grad_norm": 0.16227427124977112, "learning_rate": 9.599058212651953e-06, "loss": 0.4994, "step": 6829 }, { "epoch": 1.7286762844849406, "grad_norm": 0.16435149312019348, "learning_rate": 9.596665825329597e-06, "loss": 0.4699, "step": 6830 }, { "epoch": 1.7289293849658316, "grad_norm": 0.20602776110172272, "learning_rate": 9.59427346112953e-06, "loss": 0.463, "step": 6831 }, { "epoch": 1.7291824854467224, "grad_norm": 0.16697001457214355, "learning_rate": 9.59188112018891e-06, "loss": 0.5047, "step": 6832 }, { "epoch": 1.7294355859276132, "grad_norm": 0.16215075552463531, "learning_rate": 9.58948880264488e-06, "loss": 0.4978, "step": 6833 }, { "epoch": 1.7296886864085042, "grad_norm": 0.16362696886062622, "learning_rate": 9.587096508634585e-06, "loss": 0.4756, "step": 6834 }, { "epoch": 1.7299417868893951, "grad_norm": 0.16182459890842438, "learning_rate": 9.58470423829517e-06, "loss": 0.4519, "step": 6835 }, { "epoch": 1.7301948873702861, "grad_norm": 0.1656409054994583, "learning_rate": 9.582311991763781e-06, "loss": 0.5008, "step": 6836 }, { "epoch": 1.730447987851177, "grad_norm": 0.1643560379743576, "learning_rate": 9.579919769177556e-06, "loss": 0.4555, "step": 6837 }, { "epoch": 1.7307010883320677, "grad_norm": 0.17004622519016266, "learning_rate": 9.577527570673644e-06, "loss": 0.4892, "step": 6838 }, { "epoch": 1.7309541888129587, "grad_norm": 0.16334494948387146, "learning_rate": 9.575135396389183e-06, "loss": 0.4795, "step": 6839 }, { "epoch": 1.7312072892938497, "grad_norm": 0.16685959696769714, "learning_rate": 9.572743246461304e-06, "loss": 0.4292, "step": 6840 }, { "epoch": 1.7314603897747407, "grad_norm": 0.16413308680057526, "learning_rate": 9.570351121027149e-06, "loss": 0.456, "step": 6841 }, { "epoch": 1.7317134902556315, "grad_norm": 0.16830222308635712, "learning_rate": 9.567959020223852e-06, "loss": 0.4692, "step": 6842 }, { "epoch": 1.7319665907365223, "grad_norm": 0.16979405283927917, "learning_rate": 9.565566944188549e-06, "loss": 0.499, "step": 6843 }, { "epoch": 1.7322196912174133, "grad_norm": 0.16267086565494537, "learning_rate": 9.563174893058369e-06, "loss": 0.4965, "step": 6844 }, { "epoch": 1.7324727916983043, "grad_norm": 0.16059516370296478, "learning_rate": 9.560782866970452e-06, "loss": 0.4804, "step": 6845 }, { "epoch": 1.7327258921791953, "grad_norm": 0.16375164687633514, "learning_rate": 9.558390866061916e-06, "loss": 0.479, "step": 6846 }, { "epoch": 1.732978992660086, "grad_norm": 0.16660970449447632, "learning_rate": 9.555998890469895e-06, "loss": 0.4666, "step": 6847 }, { "epoch": 1.7332320931409768, "grad_norm": 0.17003916203975677, "learning_rate": 9.553606940331518e-06, "loss": 0.4758, "step": 6848 }, { "epoch": 1.7334851936218678, "grad_norm": 0.17121854424476624, "learning_rate": 9.551215015783907e-06, "loss": 0.49, "step": 6849 }, { "epoch": 1.7337382941027588, "grad_norm": 0.16399338841438293, "learning_rate": 9.54882311696419e-06, "loss": 0.4868, "step": 6850 }, { "epoch": 1.7339913945836498, "grad_norm": 0.16992086172103882, "learning_rate": 9.546431244009484e-06, "loss": 0.4868, "step": 6851 }, { "epoch": 1.7342444950645406, "grad_norm": 0.16450315713882446, "learning_rate": 9.544039397056913e-06, "loss": 0.4905, "step": 6852 }, { "epoch": 1.7344975955454314, "grad_norm": 0.16418522596359253, "learning_rate": 9.541647576243594e-06, "loss": 0.4685, "step": 6853 }, { "epoch": 1.7347506960263224, "grad_norm": 0.17265629768371582, "learning_rate": 9.539255781706648e-06, "loss": 0.4957, "step": 6854 }, { "epoch": 1.7350037965072134, "grad_norm": 0.1894061118364334, "learning_rate": 9.536864013583193e-06, "loss": 0.4731, "step": 6855 }, { "epoch": 1.7352568969881044, "grad_norm": 0.17494072020053864, "learning_rate": 9.534472272010343e-06, "loss": 0.4852, "step": 6856 }, { "epoch": 1.7355099974689951, "grad_norm": 0.2580101788043976, "learning_rate": 9.532080557125208e-06, "loss": 0.4684, "step": 6857 }, { "epoch": 1.7357630979498861, "grad_norm": 0.16834989190101624, "learning_rate": 9.529688869064901e-06, "loss": 0.4949, "step": 6858 }, { "epoch": 1.736016198430777, "grad_norm": 4.211454391479492, "learning_rate": 9.527297207966532e-06, "loss": 0.6081, "step": 6859 }, { "epoch": 1.736269298911668, "grad_norm": 0.16508643329143524, "learning_rate": 9.524905573967214e-06, "loss": 0.4982, "step": 6860 }, { "epoch": 1.736522399392559, "grad_norm": 0.18229559063911438, "learning_rate": 9.52251396720405e-06, "loss": 0.4741, "step": 6861 }, { "epoch": 1.73677549987345, "grad_norm": 0.17179259657859802, "learning_rate": 9.520122387814153e-06, "loss": 0.4843, "step": 6862 }, { "epoch": 1.7370286003543407, "grad_norm": 0.170107901096344, "learning_rate": 9.517730835934616e-06, "loss": 0.501, "step": 6863 }, { "epoch": 1.7372817008352315, "grad_norm": 0.16533425450325012, "learning_rate": 9.51533931170255e-06, "loss": 0.469, "step": 6864 }, { "epoch": 1.7375348013161225, "grad_norm": 0.16275399923324585, "learning_rate": 9.51294781525505e-06, "loss": 0.4654, "step": 6865 }, { "epoch": 1.7377879017970135, "grad_norm": 0.16305772960186005, "learning_rate": 9.510556346729218e-06, "loss": 0.5052, "step": 6866 }, { "epoch": 1.7380410022779045, "grad_norm": 0.16935844719409943, "learning_rate": 9.50816490626216e-06, "loss": 0.4732, "step": 6867 }, { "epoch": 1.7382941027587953, "grad_norm": 0.171352356672287, "learning_rate": 9.505773493990956e-06, "loss": 0.4954, "step": 6868 }, { "epoch": 1.738547203239686, "grad_norm": 0.16739875078201294, "learning_rate": 9.503382110052714e-06, "loss": 0.4709, "step": 6869 }, { "epoch": 1.738800303720577, "grad_norm": 0.16673533618450165, "learning_rate": 9.50099075458452e-06, "loss": 0.4897, "step": 6870 }, { "epoch": 1.739053404201468, "grad_norm": 0.16723501682281494, "learning_rate": 9.498599427723467e-06, "loss": 0.4704, "step": 6871 }, { "epoch": 1.739306504682359, "grad_norm": 0.17021973431110382, "learning_rate": 9.496208129606645e-06, "loss": 0.4922, "step": 6872 }, { "epoch": 1.7395596051632498, "grad_norm": 0.16896764934062958, "learning_rate": 9.493816860371147e-06, "loss": 0.4652, "step": 6873 }, { "epoch": 1.7398127056441406, "grad_norm": 0.16638244688510895, "learning_rate": 9.491425620154053e-06, "loss": 0.4825, "step": 6874 }, { "epoch": 1.7400658061250316, "grad_norm": 0.1734493225812912, "learning_rate": 9.489034409092444e-06, "loss": 0.4938, "step": 6875 }, { "epoch": 1.7403189066059226, "grad_norm": 0.16888616979122162, "learning_rate": 9.486643227323413e-06, "loss": 0.4872, "step": 6876 }, { "epoch": 1.7405720070868136, "grad_norm": 0.16770464181900024, "learning_rate": 9.484252074984036e-06, "loss": 0.4613, "step": 6877 }, { "epoch": 1.7408251075677044, "grad_norm": 0.1656063348054886, "learning_rate": 9.481860952211395e-06, "loss": 0.4743, "step": 6878 }, { "epoch": 1.7410782080485951, "grad_norm": 0.1675337702035904, "learning_rate": 9.479469859142565e-06, "loss": 0.5073, "step": 6879 }, { "epoch": 1.7413313085294861, "grad_norm": 0.1647523194551468, "learning_rate": 9.477078795914625e-06, "loss": 0.4675, "step": 6880 }, { "epoch": 1.7415844090103771, "grad_norm": 0.16921988129615784, "learning_rate": 9.474687762664644e-06, "loss": 0.4809, "step": 6881 }, { "epoch": 1.7418375094912681, "grad_norm": 0.1698898822069168, "learning_rate": 9.472296759529704e-06, "loss": 0.5054, "step": 6882 }, { "epoch": 1.742090609972159, "grad_norm": 0.16974307596683502, "learning_rate": 9.46990578664687e-06, "loss": 0.4675, "step": 6883 }, { "epoch": 1.7423437104530497, "grad_norm": 0.16368524730205536, "learning_rate": 9.467514844153213e-06, "loss": 0.4495, "step": 6884 }, { "epoch": 1.7425968109339407, "grad_norm": 0.16836228966712952, "learning_rate": 9.4651239321858e-06, "loss": 0.4681, "step": 6885 }, { "epoch": 1.7428499114148317, "grad_norm": 0.1696322113275528, "learning_rate": 9.462733050881698e-06, "loss": 0.5132, "step": 6886 }, { "epoch": 1.7431030118957227, "grad_norm": 0.17749930918216705, "learning_rate": 9.46034220037797e-06, "loss": 0.4817, "step": 6887 }, { "epoch": 1.7433561123766135, "grad_norm": 0.16833819448947906, "learning_rate": 9.457951380811677e-06, "loss": 0.5023, "step": 6888 }, { "epoch": 1.7436092128575045, "grad_norm": 0.16830146312713623, "learning_rate": 9.455560592319888e-06, "loss": 0.4767, "step": 6889 }, { "epoch": 1.7438623133383953, "grad_norm": 0.17059051990509033, "learning_rate": 9.453169835039648e-06, "loss": 0.4824, "step": 6890 }, { "epoch": 1.7441154138192863, "grad_norm": 0.16638414561748505, "learning_rate": 9.450779109108024e-06, "loss": 0.4937, "step": 6891 }, { "epoch": 1.7443685143001773, "grad_norm": 0.17462825775146484, "learning_rate": 9.448388414662069e-06, "loss": 0.4854, "step": 6892 }, { "epoch": 1.744621614781068, "grad_norm": 0.15856239199638367, "learning_rate": 9.445997751838835e-06, "loss": 0.4601, "step": 6893 }, { "epoch": 1.744874715261959, "grad_norm": 0.17154696583747864, "learning_rate": 9.443607120775377e-06, "loss": 0.4692, "step": 6894 }, { "epoch": 1.7451278157428498, "grad_norm": 0.16387785971164703, "learning_rate": 9.441216521608743e-06, "loss": 0.4668, "step": 6895 }, { "epoch": 1.7453809162237408, "grad_norm": 0.16854718327522278, "learning_rate": 9.438825954475977e-06, "loss": 0.4718, "step": 6896 }, { "epoch": 1.7456340167046318, "grad_norm": 0.17270982265472412, "learning_rate": 9.43643541951413e-06, "loss": 0.4834, "step": 6897 }, { "epoch": 1.7458871171855228, "grad_norm": 0.16778072714805603, "learning_rate": 9.434044916860245e-06, "loss": 0.4878, "step": 6898 }, { "epoch": 1.7461402176664136, "grad_norm": 0.16705740988254547, "learning_rate": 9.431654446651363e-06, "loss": 0.4922, "step": 6899 }, { "epoch": 1.7463933181473044, "grad_norm": 0.17117808759212494, "learning_rate": 9.42926400902453e-06, "loss": 0.4989, "step": 6900 }, { "epoch": 1.7466464186281954, "grad_norm": 0.16874170303344727, "learning_rate": 9.426873604116779e-06, "loss": 0.4608, "step": 6901 }, { "epoch": 1.7468995191090864, "grad_norm": 0.17093709111213684, "learning_rate": 9.424483232065146e-06, "loss": 0.4778, "step": 6902 }, { "epoch": 1.7471526195899774, "grad_norm": 0.16945087909698486, "learning_rate": 9.422092893006669e-06, "loss": 0.4904, "step": 6903 }, { "epoch": 1.7474057200708681, "grad_norm": 0.167316272854805, "learning_rate": 9.419702587078383e-06, "loss": 0.4783, "step": 6904 }, { "epoch": 1.747658820551759, "grad_norm": 0.1649433672428131, "learning_rate": 9.417312314417315e-06, "loss": 0.4923, "step": 6905 }, { "epoch": 1.74791192103265, "grad_norm": 0.1901371330022812, "learning_rate": 9.414922075160498e-06, "loss": 0.4848, "step": 6906 }, { "epoch": 1.748165021513541, "grad_norm": 0.16857343912124634, "learning_rate": 9.412531869444956e-06, "loss": 0.4822, "step": 6907 }, { "epoch": 1.748418121994432, "grad_norm": 0.16434934735298157, "learning_rate": 9.410141697407717e-06, "loss": 0.4762, "step": 6908 }, { "epoch": 1.7486712224753227, "grad_norm": 0.16747720539569855, "learning_rate": 9.407751559185801e-06, "loss": 0.4969, "step": 6909 }, { "epoch": 1.7489243229562135, "grad_norm": 0.1705252081155777, "learning_rate": 9.40536145491623e-06, "loss": 0.4942, "step": 6910 }, { "epoch": 1.7491774234371045, "grad_norm": 0.16569790244102478, "learning_rate": 9.402971384736033e-06, "loss": 0.4544, "step": 6911 }, { "epoch": 1.7494305239179955, "grad_norm": 0.16756413877010345, "learning_rate": 9.400581348782214e-06, "loss": 0.4592, "step": 6912 }, { "epoch": 1.7496836243988865, "grad_norm": 0.1677793264389038, "learning_rate": 9.398191347191795e-06, "loss": 0.4958, "step": 6913 }, { "epoch": 1.7499367248797773, "grad_norm": 0.19030047953128815, "learning_rate": 9.39580138010179e-06, "loss": 0.4733, "step": 6914 }, { "epoch": 1.750189825360668, "grad_norm": 0.16542237997055054, "learning_rate": 9.39341144764921e-06, "loss": 0.4673, "step": 6915 }, { "epoch": 1.750442925841559, "grad_norm": 0.16591395437717438, "learning_rate": 9.391021549971067e-06, "loss": 0.4608, "step": 6916 }, { "epoch": 1.75069602632245, "grad_norm": 0.1682930886745453, "learning_rate": 9.388631687204368e-06, "loss": 0.482, "step": 6917 }, { "epoch": 1.750949126803341, "grad_norm": 0.17850051820278168, "learning_rate": 9.386241859486114e-06, "loss": 0.4901, "step": 6918 }, { "epoch": 1.7512022272842318, "grad_norm": 0.18162712454795837, "learning_rate": 9.383852066953311e-06, "loss": 0.4697, "step": 6919 }, { "epoch": 1.7514553277651228, "grad_norm": 0.179778590798378, "learning_rate": 9.381462309742965e-06, "loss": 0.497, "step": 6920 }, { "epoch": 1.7517084282460136, "grad_norm": 0.2903284430503845, "learning_rate": 9.37907258799207e-06, "loss": 0.4765, "step": 6921 }, { "epoch": 1.7519615287269046, "grad_norm": 0.16395986080169678, "learning_rate": 9.37668290183763e-06, "loss": 0.495, "step": 6922 }, { "epoch": 1.7522146292077956, "grad_norm": 0.17313700914382935, "learning_rate": 9.374293251416638e-06, "loss": 0.4895, "step": 6923 }, { "epoch": 1.7524677296886864, "grad_norm": 0.16439716517925262, "learning_rate": 9.371903636866083e-06, "loss": 0.4941, "step": 6924 }, { "epoch": 1.7527208301695774, "grad_norm": 0.1682966649532318, "learning_rate": 9.36951405832296e-06, "loss": 0.484, "step": 6925 }, { "epoch": 1.7529739306504681, "grad_norm": 0.16743111610412598, "learning_rate": 9.36712451592426e-06, "loss": 0.4658, "step": 6926 }, { "epoch": 1.7532270311313591, "grad_norm": 0.16707782447338104, "learning_rate": 9.364735009806967e-06, "loss": 0.4789, "step": 6927 }, { "epoch": 1.7534801316122501, "grad_norm": 0.17747798562049866, "learning_rate": 9.362345540108074e-06, "loss": 0.5125, "step": 6928 }, { "epoch": 1.7537332320931411, "grad_norm": 0.1629725843667984, "learning_rate": 9.359956106964552e-06, "loss": 0.4782, "step": 6929 }, { "epoch": 1.753986332574032, "grad_norm": 0.16596485674381256, "learning_rate": 9.357566710513393e-06, "loss": 0.4623, "step": 6930 }, { "epoch": 1.7542394330549227, "grad_norm": 0.16533876955509186, "learning_rate": 9.35517735089157e-06, "loss": 0.4917, "step": 6931 }, { "epoch": 1.7544925335358137, "grad_norm": 0.1673717200756073, "learning_rate": 9.35278802823606e-06, "loss": 0.4874, "step": 6932 }, { "epoch": 1.7547456340167047, "grad_norm": 0.20833104848861694, "learning_rate": 9.35039874268384e-06, "loss": 0.4573, "step": 6933 }, { "epoch": 1.7549987344975957, "grad_norm": 0.16671670973300934, "learning_rate": 9.348009494371886e-06, "loss": 0.4886, "step": 6934 }, { "epoch": 1.7552518349784865, "grad_norm": 0.1699654906988144, "learning_rate": 9.34562028343716e-06, "loss": 0.486, "step": 6935 }, { "epoch": 1.7555049354593772, "grad_norm": 0.16722513735294342, "learning_rate": 9.343231110016638e-06, "loss": 0.4803, "step": 6936 }, { "epoch": 1.7557580359402682, "grad_norm": 0.16739751398563385, "learning_rate": 9.340841974247283e-06, "loss": 0.4796, "step": 6937 }, { "epoch": 1.7560111364211592, "grad_norm": 0.17406658828258514, "learning_rate": 9.33845287626606e-06, "loss": 0.4783, "step": 6938 }, { "epoch": 1.7562642369020502, "grad_norm": 0.17532232403755188, "learning_rate": 9.336063816209929e-06, "loss": 0.495, "step": 6939 }, { "epoch": 1.756517337382941, "grad_norm": 0.16142593324184418, "learning_rate": 9.333674794215853e-06, "loss": 0.4732, "step": 6940 }, { "epoch": 1.7567704378638318, "grad_norm": 0.16808964312076569, "learning_rate": 9.331285810420782e-06, "loss": 0.4762, "step": 6941 }, { "epoch": 1.7570235383447228, "grad_norm": 0.1794271618127823, "learning_rate": 9.32889686496168e-06, "loss": 0.4677, "step": 6942 }, { "epoch": 1.7572766388256138, "grad_norm": 0.16722774505615234, "learning_rate": 9.326507957975499e-06, "loss": 0.4859, "step": 6943 }, { "epoch": 1.7575297393065048, "grad_norm": 0.16859862208366394, "learning_rate": 9.324119089599186e-06, "loss": 0.5051, "step": 6944 }, { "epoch": 1.7577828397873956, "grad_norm": 0.17183734476566315, "learning_rate": 9.321730259969695e-06, "loss": 0.4552, "step": 6945 }, { "epoch": 1.7580359402682864, "grad_norm": 0.16478818655014038, "learning_rate": 9.319341469223968e-06, "loss": 0.4575, "step": 6946 }, { "epoch": 1.7582890407491774, "grad_norm": 0.1679842621088028, "learning_rate": 9.31695271749895e-06, "loss": 0.4933, "step": 6947 }, { "epoch": 1.7585421412300684, "grad_norm": 0.16957353055477142, "learning_rate": 9.31456400493158e-06, "loss": 0.4571, "step": 6948 }, { "epoch": 1.7587952417109594, "grad_norm": 0.17338255047798157, "learning_rate": 9.312175331658805e-06, "loss": 0.4731, "step": 6949 }, { "epoch": 1.7590483421918501, "grad_norm": 0.17478033900260925, "learning_rate": 9.309786697817564e-06, "loss": 0.492, "step": 6950 }, { "epoch": 1.7593014426727411, "grad_norm": 0.1639743447303772, "learning_rate": 9.307398103544783e-06, "loss": 0.4862, "step": 6951 }, { "epoch": 1.759554543153632, "grad_norm": 0.16621550917625427, "learning_rate": 9.305009548977399e-06, "loss": 0.4734, "step": 6952 }, { "epoch": 1.759807643634523, "grad_norm": 0.16554655134677887, "learning_rate": 9.302621034252344e-06, "loss": 0.4808, "step": 6953 }, { "epoch": 1.760060744115414, "grad_norm": 0.16693361103534698, "learning_rate": 9.300232559506543e-06, "loss": 0.4689, "step": 6954 }, { "epoch": 1.7603138445963047, "grad_norm": 0.16863131523132324, "learning_rate": 9.297844124876929e-06, "loss": 0.4845, "step": 6955 }, { "epoch": 1.7605669450771957, "grad_norm": 0.16777345538139343, "learning_rate": 9.295455730500424e-06, "loss": 0.4868, "step": 6956 }, { "epoch": 1.7608200455580865, "grad_norm": 0.16649167239665985, "learning_rate": 9.293067376513942e-06, "loss": 0.4807, "step": 6957 }, { "epoch": 1.7610731460389775, "grad_norm": 0.16671016812324524, "learning_rate": 9.29067906305441e-06, "loss": 0.4907, "step": 6958 }, { "epoch": 1.7613262465198685, "grad_norm": 0.18288390338420868, "learning_rate": 9.288290790258741e-06, "loss": 0.488, "step": 6959 }, { "epoch": 1.7615793470007595, "grad_norm": 0.1702820360660553, "learning_rate": 9.285902558263853e-06, "loss": 0.5006, "step": 6960 }, { "epoch": 1.7618324474816502, "grad_norm": 0.17033761739730835, "learning_rate": 9.283514367206659e-06, "loss": 0.4877, "step": 6961 }, { "epoch": 1.762085547962541, "grad_norm": 0.16725802421569824, "learning_rate": 9.281126217224064e-06, "loss": 0.4621, "step": 6962 }, { "epoch": 1.762338648443432, "grad_norm": 0.1672438532114029, "learning_rate": 9.278738108452974e-06, "loss": 0.4885, "step": 6963 }, { "epoch": 1.762591748924323, "grad_norm": 0.16424107551574707, "learning_rate": 9.276350041030303e-06, "loss": 0.4708, "step": 6964 }, { "epoch": 1.762844849405214, "grad_norm": 0.17014002799987793, "learning_rate": 9.273962015092949e-06, "loss": 0.4704, "step": 6965 }, { "epoch": 1.7630979498861048, "grad_norm": 0.16585522890090942, "learning_rate": 9.271574030777811e-06, "loss": 0.4843, "step": 6966 }, { "epoch": 1.7633510503669956, "grad_norm": 0.16645929217338562, "learning_rate": 9.269186088221793e-06, "loss": 0.4865, "step": 6967 }, { "epoch": 1.7636041508478866, "grad_norm": 0.16586482524871826, "learning_rate": 9.266798187561782e-06, "loss": 0.4674, "step": 6968 }, { "epoch": 1.7638572513287776, "grad_norm": 0.16732823848724365, "learning_rate": 9.264410328934677e-06, "loss": 0.499, "step": 6969 }, { "epoch": 1.7641103518096686, "grad_norm": 0.16658686101436615, "learning_rate": 9.262022512477363e-06, "loss": 0.4714, "step": 6970 }, { "epoch": 1.7643634522905594, "grad_norm": 0.1671464890241623, "learning_rate": 9.259634738326738e-06, "loss": 0.4657, "step": 6971 }, { "epoch": 1.7646165527714501, "grad_norm": 0.16643331944942474, "learning_rate": 9.257247006619683e-06, "loss": 0.5045, "step": 6972 }, { "epoch": 1.7648696532523411, "grad_norm": 0.1648384928703308, "learning_rate": 9.25485931749308e-06, "loss": 0.4795, "step": 6973 }, { "epoch": 1.7651227537332321, "grad_norm": 0.16552092134952545, "learning_rate": 9.252471671083813e-06, "loss": 0.4974, "step": 6974 }, { "epoch": 1.7653758542141231, "grad_norm": 0.16564513742923737, "learning_rate": 9.250084067528757e-06, "loss": 0.4826, "step": 6975 }, { "epoch": 1.765628954695014, "grad_norm": 0.17558008432388306, "learning_rate": 9.247696506964791e-06, "loss": 0.5097, "step": 6976 }, { "epoch": 1.7658820551759047, "grad_norm": 0.16716350615024567, "learning_rate": 9.24530898952879e-06, "loss": 0.4837, "step": 6977 }, { "epoch": 1.7661351556567957, "grad_norm": 0.16862943768501282, "learning_rate": 9.242921515357627e-06, "loss": 0.4608, "step": 6978 }, { "epoch": 1.7663882561376867, "grad_norm": 0.1660955399274826, "learning_rate": 9.240534084588161e-06, "loss": 0.4939, "step": 6979 }, { "epoch": 1.7666413566185777, "grad_norm": 0.1682509183883667, "learning_rate": 9.238146697357268e-06, "loss": 0.4804, "step": 6980 }, { "epoch": 1.7668944570994685, "grad_norm": 0.17387434840202332, "learning_rate": 9.235759353801808e-06, "loss": 0.4726, "step": 6981 }, { "epoch": 1.7671475575803592, "grad_norm": 0.16988109052181244, "learning_rate": 9.233372054058644e-06, "loss": 0.5097, "step": 6982 }, { "epoch": 1.7674006580612502, "grad_norm": 0.16897307336330414, "learning_rate": 9.230984798264632e-06, "loss": 0.4898, "step": 6983 }, { "epoch": 1.7676537585421412, "grad_norm": 0.16941790282726288, "learning_rate": 9.228597586556637e-06, "loss": 0.4945, "step": 6984 }, { "epoch": 1.7679068590230322, "grad_norm": 0.16979052126407623, "learning_rate": 9.226210419071498e-06, "loss": 0.485, "step": 6985 }, { "epoch": 1.768159959503923, "grad_norm": 0.16603583097457886, "learning_rate": 9.223823295946076e-06, "loss": 0.4841, "step": 6986 }, { "epoch": 1.768413059984814, "grad_norm": 0.17356260120868683, "learning_rate": 9.221436217317219e-06, "loss": 0.5037, "step": 6987 }, { "epoch": 1.7686661604657048, "grad_norm": 0.1614154577255249, "learning_rate": 9.219049183321772e-06, "loss": 0.4583, "step": 6988 }, { "epoch": 1.7689192609465958, "grad_norm": 0.16631822288036346, "learning_rate": 9.216662194096582e-06, "loss": 0.486, "step": 6989 }, { "epoch": 1.7691723614274868, "grad_norm": 0.16713625192642212, "learning_rate": 9.214275249778484e-06, "loss": 0.4798, "step": 6990 }, { "epoch": 1.7694254619083776, "grad_norm": 0.16603118181228638, "learning_rate": 9.211888350504318e-06, "loss": 0.4847, "step": 6991 }, { "epoch": 1.7696785623892686, "grad_norm": 0.16162800788879395, "learning_rate": 9.20950149641092e-06, "loss": 0.4403, "step": 6992 }, { "epoch": 1.7699316628701594, "grad_norm": 0.16963225603103638, "learning_rate": 9.207114687635126e-06, "loss": 0.4946, "step": 6993 }, { "epoch": 1.7701847633510503, "grad_norm": 0.17010769248008728, "learning_rate": 9.204727924313766e-06, "loss": 0.4567, "step": 6994 }, { "epoch": 1.7704378638319413, "grad_norm": 0.1798984855413437, "learning_rate": 9.202341206583669e-06, "loss": 0.4912, "step": 6995 }, { "epoch": 1.7706909643128323, "grad_norm": 0.16433045268058777, "learning_rate": 9.199954534581655e-06, "loss": 0.4784, "step": 6996 }, { "epoch": 1.7709440647937231, "grad_norm": 0.1672849953174591, "learning_rate": 9.197567908444552e-06, "loss": 0.4779, "step": 6997 }, { "epoch": 1.771197165274614, "grad_norm": 0.18533079326152802, "learning_rate": 9.195181328309177e-06, "loss": 0.492, "step": 6998 }, { "epoch": 1.771450265755505, "grad_norm": 0.16386374831199646, "learning_rate": 9.19279479431235e-06, "loss": 0.4804, "step": 6999 }, { "epoch": 1.771703366236396, "grad_norm": 0.18040058016777039, "learning_rate": 9.19040830659089e-06, "loss": 0.4694, "step": 7000 }, { "epoch": 1.771956466717287, "grad_norm": 0.16442139446735382, "learning_rate": 9.188021865281598e-06, "loss": 0.4925, "step": 7001 }, { "epoch": 1.7722095671981777, "grad_norm": 0.1626272052526474, "learning_rate": 9.185635470521291e-06, "loss": 0.4797, "step": 7002 }, { "epoch": 1.7724626676790685, "grad_norm": 0.17136268317699432, "learning_rate": 9.183249122446777e-06, "loss": 0.4832, "step": 7003 }, { "epoch": 1.7727157681599595, "grad_norm": 0.16506743431091309, "learning_rate": 9.180862821194858e-06, "loss": 0.4774, "step": 7004 }, { "epoch": 1.7729688686408505, "grad_norm": 0.16300910711288452, "learning_rate": 9.178476566902333e-06, "loss": 0.4861, "step": 7005 }, { "epoch": 1.7732219691217415, "grad_norm": 0.16379359364509583, "learning_rate": 9.17609035970601e-06, "loss": 0.4691, "step": 7006 }, { "epoch": 1.7734750696026322, "grad_norm": 0.17352890968322754, "learning_rate": 9.173704199742672e-06, "loss": 0.4887, "step": 7007 }, { "epoch": 1.773728170083523, "grad_norm": 0.173080176115036, "learning_rate": 9.171318087149123e-06, "loss": 0.4596, "step": 7008 }, { "epoch": 1.773981270564414, "grad_norm": 0.16731397807598114, "learning_rate": 9.168932022062151e-06, "loss": 0.4662, "step": 7009 }, { "epoch": 1.774234371045305, "grad_norm": 0.17233210802078247, "learning_rate": 9.16654600461854e-06, "loss": 0.4598, "step": 7010 }, { "epoch": 1.774487471526196, "grad_norm": 0.16912797093391418, "learning_rate": 9.164160034955085e-06, "loss": 0.5034, "step": 7011 }, { "epoch": 1.7747405720070868, "grad_norm": 0.16088221967220306, "learning_rate": 9.161774113208557e-06, "loss": 0.4621, "step": 7012 }, { "epoch": 1.7749936724879776, "grad_norm": 0.16965875029563904, "learning_rate": 9.159388239515742e-06, "loss": 0.4902, "step": 7013 }, { "epoch": 1.7752467729688686, "grad_norm": 0.167842835187912, "learning_rate": 9.157002414013412e-06, "loss": 0.4942, "step": 7014 }, { "epoch": 1.7754998734497596, "grad_norm": 0.1685698926448822, "learning_rate": 9.154616636838351e-06, "loss": 0.4754, "step": 7015 }, { "epoch": 1.7757529739306506, "grad_norm": 0.16558191180229187, "learning_rate": 9.152230908127322e-06, "loss": 0.484, "step": 7016 }, { "epoch": 1.7760060744115413, "grad_norm": 0.17744605243206024, "learning_rate": 9.1498452280171e-06, "loss": 0.4787, "step": 7017 }, { "epoch": 1.7762591748924323, "grad_norm": 0.16893842816352844, "learning_rate": 9.147459596644446e-06, "loss": 0.5017, "step": 7018 }, { "epoch": 1.7765122753733231, "grad_norm": 0.16835230588912964, "learning_rate": 9.145074014146125e-06, "loss": 0.4854, "step": 7019 }, { "epoch": 1.7767653758542141, "grad_norm": 0.16095156967639923, "learning_rate": 9.142688480658894e-06, "loss": 0.471, "step": 7020 }, { "epoch": 1.7770184763351051, "grad_norm": 0.17382606863975525, "learning_rate": 9.140302996319516e-06, "loss": 0.474, "step": 7021 }, { "epoch": 1.777271576815996, "grad_norm": 0.16744069755077362, "learning_rate": 9.13791756126475e-06, "loss": 0.4613, "step": 7022 }, { "epoch": 1.777524677296887, "grad_norm": 0.17986859381198883, "learning_rate": 9.135532175631333e-06, "loss": 0.4859, "step": 7023 }, { "epoch": 1.7777777777777777, "grad_norm": 0.16533297300338745, "learning_rate": 9.133146839556025e-06, "loss": 0.4898, "step": 7024 }, { "epoch": 1.7780308782586687, "grad_norm": 0.16495969891548157, "learning_rate": 9.130761553175572e-06, "loss": 0.4882, "step": 7025 }, { "epoch": 1.7782839787395597, "grad_norm": 0.16410301625728607, "learning_rate": 9.128376316626713e-06, "loss": 0.4728, "step": 7026 }, { "epoch": 1.7785370792204507, "grad_norm": 0.16634130477905273, "learning_rate": 9.125991130046187e-06, "loss": 0.4837, "step": 7027 }, { "epoch": 1.7787901797013415, "grad_norm": 0.16434365510940552, "learning_rate": 9.123605993570745e-06, "loss": 0.475, "step": 7028 }, { "epoch": 1.7790432801822322, "grad_norm": 0.1680000275373459, "learning_rate": 9.121220907337105e-06, "loss": 0.4673, "step": 7029 }, { "epoch": 1.7792963806631232, "grad_norm": 0.1674003005027771, "learning_rate": 9.118835871482008e-06, "loss": 0.4619, "step": 7030 }, { "epoch": 1.7795494811440142, "grad_norm": 0.17831020057201385, "learning_rate": 9.116450886142183e-06, "loss": 0.4742, "step": 7031 }, { "epoch": 1.7798025816249052, "grad_norm": 0.17159344255924225, "learning_rate": 9.114065951454353e-06, "loss": 0.5007, "step": 7032 }, { "epoch": 1.780055682105796, "grad_norm": 0.16629333794116974, "learning_rate": 9.111681067555246e-06, "loss": 0.4918, "step": 7033 }, { "epoch": 1.7803087825866868, "grad_norm": 0.1723182052373886, "learning_rate": 9.109296234581577e-06, "loss": 0.4796, "step": 7034 }, { "epoch": 1.7805618830675778, "grad_norm": 0.1646215170621872, "learning_rate": 9.106911452670063e-06, "loss": 0.4757, "step": 7035 }, { "epoch": 1.7808149835484688, "grad_norm": 0.16032730042934418, "learning_rate": 9.104526721957421e-06, "loss": 0.4612, "step": 7036 }, { "epoch": 1.7810680840293598, "grad_norm": 0.16717419028282166, "learning_rate": 9.102142042580364e-06, "loss": 0.4758, "step": 7037 }, { "epoch": 1.7813211845102506, "grad_norm": 0.17210359871387482, "learning_rate": 9.099757414675599e-06, "loss": 0.4664, "step": 7038 }, { "epoch": 1.7815742849911413, "grad_norm": 0.16316628456115723, "learning_rate": 9.097372838379834e-06, "loss": 0.4951, "step": 7039 }, { "epoch": 1.7818273854720323, "grad_norm": 0.1732771098613739, "learning_rate": 9.094988313829768e-06, "loss": 0.4866, "step": 7040 }, { "epoch": 1.7820804859529233, "grad_norm": 0.17820844054222107, "learning_rate": 9.0926038411621e-06, "loss": 0.4869, "step": 7041 }, { "epoch": 1.7823335864338143, "grad_norm": 0.16964390873908997, "learning_rate": 9.09021942051353e-06, "loss": 0.4868, "step": 7042 }, { "epoch": 1.7825866869147051, "grad_norm": 0.1941564679145813, "learning_rate": 9.08783505202075e-06, "loss": 0.4885, "step": 7043 }, { "epoch": 1.782839787395596, "grad_norm": 0.16407938301563263, "learning_rate": 9.085450735820458e-06, "loss": 0.4935, "step": 7044 }, { "epoch": 1.783092887876487, "grad_norm": 0.16720189154148102, "learning_rate": 9.083066472049328e-06, "loss": 0.5019, "step": 7045 }, { "epoch": 1.783345988357378, "grad_norm": 0.173940047621727, "learning_rate": 9.080682260844055e-06, "loss": 0.5133, "step": 7046 }, { "epoch": 1.783599088838269, "grad_norm": 0.16578060388565063, "learning_rate": 9.078298102341316e-06, "loss": 0.4942, "step": 7047 }, { "epoch": 1.7838521893191597, "grad_norm": 0.1631111204624176, "learning_rate": 9.075913996677795e-06, "loss": 0.4812, "step": 7048 }, { "epoch": 1.7841052898000507, "grad_norm": 0.16792747378349304, "learning_rate": 9.073529943990159e-06, "loss": 0.5213, "step": 7049 }, { "epoch": 1.7843583902809415, "grad_norm": 0.16683267056941986, "learning_rate": 9.071145944415094e-06, "loss": 0.4922, "step": 7050 }, { "epoch": 1.7846114907618325, "grad_norm": 0.1699254959821701, "learning_rate": 9.068761998089254e-06, "loss": 0.4643, "step": 7051 }, { "epoch": 1.7848645912427235, "grad_norm": 0.16959483921527863, "learning_rate": 9.066378105149316e-06, "loss": 0.4952, "step": 7052 }, { "epoch": 1.7851176917236142, "grad_norm": 0.1889575719833374, "learning_rate": 9.063994265731942e-06, "loss": 0.5074, "step": 7053 }, { "epoch": 1.7853707922045052, "grad_norm": 0.1743732988834381, "learning_rate": 9.06161047997379e-06, "loss": 0.4843, "step": 7054 }, { "epoch": 1.785623892685396, "grad_norm": 0.16450104117393494, "learning_rate": 9.059226748011518e-06, "loss": 0.504, "step": 7055 }, { "epoch": 1.785876993166287, "grad_norm": 0.16234098374843597, "learning_rate": 9.056843069981785e-06, "loss": 0.4628, "step": 7056 }, { "epoch": 1.786130093647178, "grad_norm": 0.1673133224248886, "learning_rate": 9.054459446021235e-06, "loss": 0.4766, "step": 7057 }, { "epoch": 1.786383194128069, "grad_norm": 0.17104916274547577, "learning_rate": 9.052075876266517e-06, "loss": 0.4942, "step": 7058 }, { "epoch": 1.7866362946089598, "grad_norm": 0.16792425513267517, "learning_rate": 9.049692360854281e-06, "loss": 0.4958, "step": 7059 }, { "epoch": 1.7868893950898506, "grad_norm": 0.17037780582904816, "learning_rate": 9.047308899921165e-06, "loss": 0.4869, "step": 7060 }, { "epoch": 1.7871424955707416, "grad_norm": 0.17294315993785858, "learning_rate": 9.04492549360381e-06, "loss": 0.4824, "step": 7061 }, { "epoch": 1.7873955960516326, "grad_norm": 0.16182255744934082, "learning_rate": 9.042542142038852e-06, "loss": 0.4608, "step": 7062 }, { "epoch": 1.7876486965325236, "grad_norm": 0.16630138456821442, "learning_rate": 9.040158845362919e-06, "loss": 0.4808, "step": 7063 }, { "epoch": 1.7879017970134143, "grad_norm": 0.1685846745967865, "learning_rate": 9.037775603712643e-06, "loss": 0.4714, "step": 7064 }, { "epoch": 1.7881548974943051, "grad_norm": 0.16405776143074036, "learning_rate": 9.035392417224652e-06, "loss": 0.4745, "step": 7065 }, { "epoch": 1.7884079979751961, "grad_norm": 0.17845842242240906, "learning_rate": 9.033009286035567e-06, "loss": 0.4748, "step": 7066 }, { "epoch": 1.7886610984560871, "grad_norm": 0.173936128616333, "learning_rate": 9.030626210282012e-06, "loss": 0.4547, "step": 7067 }, { "epoch": 1.7889141989369781, "grad_norm": 0.17056939005851746, "learning_rate": 9.028243190100596e-06, "loss": 0.5054, "step": 7068 }, { "epoch": 1.789167299417869, "grad_norm": 0.16727840900421143, "learning_rate": 9.025860225627937e-06, "loss": 0.4871, "step": 7069 }, { "epoch": 1.7894203998987597, "grad_norm": 0.1713520884513855, "learning_rate": 9.023477317000647e-06, "loss": 0.4739, "step": 7070 }, { "epoch": 1.7896735003796507, "grad_norm": 0.17109452188014984, "learning_rate": 9.021094464355326e-06, "loss": 0.4861, "step": 7071 }, { "epoch": 1.7899266008605417, "grad_norm": 0.1664499044418335, "learning_rate": 9.01871166782859e-06, "loss": 0.4711, "step": 7072 }, { "epoch": 1.7901797013414327, "grad_norm": 0.16770176589488983, "learning_rate": 9.016328927557026e-06, "loss": 0.4846, "step": 7073 }, { "epoch": 1.7904328018223234, "grad_norm": 0.1679050177335739, "learning_rate": 9.013946243677241e-06, "loss": 0.4798, "step": 7074 }, { "epoch": 1.7906859023032142, "grad_norm": 0.16679927706718445, "learning_rate": 9.011563616325827e-06, "loss": 0.4686, "step": 7075 }, { "epoch": 1.7909390027841052, "grad_norm": 0.1656089872121811, "learning_rate": 9.009181045639372e-06, "loss": 0.4871, "step": 7076 }, { "epoch": 1.7911921032649962, "grad_norm": 0.21851710975170135, "learning_rate": 9.006798531754466e-06, "loss": 0.4784, "step": 7077 }, { "epoch": 1.7914452037458872, "grad_norm": 0.16585250198841095, "learning_rate": 9.004416074807699e-06, "loss": 0.4797, "step": 7078 }, { "epoch": 1.791698304226778, "grad_norm": 0.16638027131557465, "learning_rate": 9.00203367493564e-06, "loss": 0.4937, "step": 7079 }, { "epoch": 1.7919514047076688, "grad_norm": 0.16672734916210175, "learning_rate": 8.999651332274873e-06, "loss": 0.4703, "step": 7080 }, { "epoch": 1.7922045051885598, "grad_norm": 0.16320641338825226, "learning_rate": 8.997269046961976e-06, "loss": 0.4668, "step": 7081 }, { "epoch": 1.7924576056694508, "grad_norm": 0.17133364081382751, "learning_rate": 8.994886819133518e-06, "loss": 0.4912, "step": 7082 }, { "epoch": 1.7927107061503418, "grad_norm": 0.16619545221328735, "learning_rate": 8.992504648926068e-06, "loss": 0.4844, "step": 7083 }, { "epoch": 1.7929638066312326, "grad_norm": 0.17308393120765686, "learning_rate": 8.990122536476185e-06, "loss": 0.5021, "step": 7084 }, { "epoch": 1.7932169071121236, "grad_norm": 0.16815774142742157, "learning_rate": 8.987740481920437e-06, "loss": 0.466, "step": 7085 }, { "epoch": 1.7934700075930143, "grad_norm": 0.19467906653881073, "learning_rate": 8.98535848539538e-06, "loss": 0.4725, "step": 7086 }, { "epoch": 1.7937231080739053, "grad_norm": 0.16423262655735016, "learning_rate": 8.982976547037567e-06, "loss": 0.4788, "step": 7087 }, { "epoch": 1.7939762085547963, "grad_norm": 0.16519945859909058, "learning_rate": 8.980594666983552e-06, "loss": 0.4782, "step": 7088 }, { "epoch": 1.7942293090356871, "grad_norm": 0.16756092011928558, "learning_rate": 8.978212845369885e-06, "loss": 0.4884, "step": 7089 }, { "epoch": 1.7944824095165781, "grad_norm": 0.16233980655670166, "learning_rate": 8.975831082333105e-06, "loss": 0.4705, "step": 7090 }, { "epoch": 1.794735509997469, "grad_norm": 0.16701973974704742, "learning_rate": 8.973449378009758e-06, "loss": 0.4524, "step": 7091 }, { "epoch": 1.79498861047836, "grad_norm": 0.17033658921718597, "learning_rate": 8.971067732536381e-06, "loss": 0.4893, "step": 7092 }, { "epoch": 1.795241710959251, "grad_norm": 0.16848523914813995, "learning_rate": 8.968686146049505e-06, "loss": 0.495, "step": 7093 }, { "epoch": 1.795494811440142, "grad_norm": 0.1759893000125885, "learning_rate": 8.96630461868567e-06, "loss": 0.4779, "step": 7094 }, { "epoch": 1.7957479119210327, "grad_norm": 0.1642276644706726, "learning_rate": 8.963923150581394e-06, "loss": 0.4761, "step": 7095 }, { "epoch": 1.7960010124019234, "grad_norm": 0.16793441772460938, "learning_rate": 8.961541741873208e-06, "loss": 0.4877, "step": 7096 }, { "epoch": 1.7962541128828144, "grad_norm": 0.17865119874477386, "learning_rate": 8.95916039269763e-06, "loss": 0.4745, "step": 7097 }, { "epoch": 1.7965072133637054, "grad_norm": 0.17056193947792053, "learning_rate": 8.95677910319118e-06, "loss": 0.4977, "step": 7098 }, { "epoch": 1.7967603138445964, "grad_norm": 0.16700968146324158, "learning_rate": 8.95439787349037e-06, "loss": 0.497, "step": 7099 }, { "epoch": 1.7970134143254872, "grad_norm": 0.1629302054643631, "learning_rate": 8.952016703731714e-06, "loss": 0.4854, "step": 7100 }, { "epoch": 1.797266514806378, "grad_norm": 0.16926102340221405, "learning_rate": 8.949635594051715e-06, "loss": 0.5022, "step": 7101 }, { "epoch": 1.797519615287269, "grad_norm": 0.17131805419921875, "learning_rate": 8.947254544586878e-06, "loss": 0.4685, "step": 7102 }, { "epoch": 1.79777271576816, "grad_norm": 0.1665675938129425, "learning_rate": 8.944873555473708e-06, "loss": 0.4756, "step": 7103 }, { "epoch": 1.798025816249051, "grad_norm": 0.1712389886379242, "learning_rate": 8.942492626848697e-06, "loss": 0.5017, "step": 7104 }, { "epoch": 1.7982789167299418, "grad_norm": 0.16281016170978546, "learning_rate": 8.940111758848345e-06, "loss": 0.4427, "step": 7105 }, { "epoch": 1.7985320172108326, "grad_norm": 0.16611382365226746, "learning_rate": 8.937730951609132e-06, "loss": 0.4731, "step": 7106 }, { "epoch": 1.7987851176917236, "grad_norm": 0.1694909781217575, "learning_rate": 8.935350205267553e-06, "loss": 0.4731, "step": 7107 }, { "epoch": 1.7990382181726146, "grad_norm": 0.1676475703716278, "learning_rate": 8.932969519960088e-06, "loss": 0.4754, "step": 7108 }, { "epoch": 1.7992913186535056, "grad_norm": 0.1679893583059311, "learning_rate": 8.930588895823213e-06, "loss": 0.4772, "step": 7109 }, { "epoch": 1.7995444191343963, "grad_norm": 0.16935577988624573, "learning_rate": 8.928208332993413e-06, "loss": 0.5017, "step": 7110 }, { "epoch": 1.7997975196152871, "grad_norm": 0.17048992216587067, "learning_rate": 8.925827831607155e-06, "loss": 0.4868, "step": 7111 }, { "epoch": 1.8000506200961781, "grad_norm": 0.16662631928920746, "learning_rate": 8.923447391800907e-06, "loss": 0.4887, "step": 7112 }, { "epoch": 1.800303720577069, "grad_norm": 0.16964873671531677, "learning_rate": 8.921067013711139e-06, "loss": 0.4683, "step": 7113 }, { "epoch": 1.80055682105796, "grad_norm": 0.16489097476005554, "learning_rate": 8.91868669747431e-06, "loss": 0.4585, "step": 7114 }, { "epoch": 1.8008099215388509, "grad_norm": 0.16195321083068848, "learning_rate": 8.916306443226874e-06, "loss": 0.4771, "step": 7115 }, { "epoch": 1.8010630220197419, "grad_norm": 0.16826103627681732, "learning_rate": 8.913926251105295e-06, "loss": 0.5224, "step": 7116 }, { "epoch": 1.8013161225006327, "grad_norm": 0.1696866750717163, "learning_rate": 8.911546121246024e-06, "loss": 0.4799, "step": 7117 }, { "epoch": 1.8015692229815237, "grad_norm": 0.16552655398845673, "learning_rate": 8.909166053785499e-06, "loss": 0.4956, "step": 7118 }, { "epoch": 1.8018223234624147, "grad_norm": 0.17347510159015656, "learning_rate": 8.906786048860171e-06, "loss": 0.5312, "step": 7119 }, { "epoch": 1.8020754239433054, "grad_norm": 0.16995598375797272, "learning_rate": 8.904406106606483e-06, "loss": 0.48, "step": 7120 }, { "epoch": 1.8023285244241964, "grad_norm": 0.16852964460849762, "learning_rate": 8.902026227160865e-06, "loss": 0.4917, "step": 7121 }, { "epoch": 1.8025816249050872, "grad_norm": 0.17176294326782227, "learning_rate": 8.89964641065976e-06, "loss": 0.5054, "step": 7122 }, { "epoch": 1.8028347253859782, "grad_norm": 0.17005310952663422, "learning_rate": 8.897266657239587e-06, "loss": 0.4882, "step": 7123 }, { "epoch": 1.8030878258668692, "grad_norm": 0.16482532024383545, "learning_rate": 8.894886967036777e-06, "loss": 0.492, "step": 7124 }, { "epoch": 1.8033409263477602, "grad_norm": 0.16217057406902313, "learning_rate": 8.892507340187755e-06, "loss": 0.4852, "step": 7125 }, { "epoch": 1.803594026828651, "grad_norm": 0.17002063989639282, "learning_rate": 8.890127776828936e-06, "loss": 0.4795, "step": 7126 }, { "epoch": 1.8038471273095418, "grad_norm": 0.16370287537574768, "learning_rate": 8.88774827709674e-06, "loss": 0.4493, "step": 7127 }, { "epoch": 1.8041002277904328, "grad_norm": 0.16013486683368683, "learning_rate": 8.885368841127576e-06, "loss": 0.4617, "step": 7128 }, { "epoch": 1.8043533282713238, "grad_norm": 0.1681145280599594, "learning_rate": 8.882989469057852e-06, "loss": 0.4625, "step": 7129 }, { "epoch": 1.8046064287522148, "grad_norm": 0.167418971657753, "learning_rate": 8.88061016102397e-06, "loss": 0.4859, "step": 7130 }, { "epoch": 1.8048595292331056, "grad_norm": 0.16534151136875153, "learning_rate": 8.878230917162332e-06, "loss": 0.4739, "step": 7131 }, { "epoch": 1.8051126297139963, "grad_norm": 0.16589811444282532, "learning_rate": 8.875851737609338e-06, "loss": 0.4872, "step": 7132 }, { "epoch": 1.8053657301948873, "grad_norm": 0.16688157618045807, "learning_rate": 8.873472622501382e-06, "loss": 0.4605, "step": 7133 }, { "epoch": 1.8056188306757783, "grad_norm": 0.16947586834430695, "learning_rate": 8.871093571974847e-06, "loss": 0.4732, "step": 7134 }, { "epoch": 1.8058719311566693, "grad_norm": 0.16935698688030243, "learning_rate": 8.868714586166124e-06, "loss": 0.4914, "step": 7135 }, { "epoch": 1.80612503163756, "grad_norm": 0.17184101045131683, "learning_rate": 8.866335665211593e-06, "loss": 0.5003, "step": 7136 }, { "epoch": 1.8063781321184509, "grad_norm": 0.1677810102701187, "learning_rate": 8.863956809247634e-06, "loss": 0.4766, "step": 7137 }, { "epoch": 1.8066312325993419, "grad_norm": 0.4090183973312378, "learning_rate": 8.86157801841062e-06, "loss": 0.4666, "step": 7138 }, { "epoch": 1.8068843330802329, "grad_norm": 0.1621973216533661, "learning_rate": 8.85919929283693e-06, "loss": 0.4684, "step": 7139 }, { "epoch": 1.8071374335611239, "grad_norm": 0.17448416352272034, "learning_rate": 8.856820632662918e-06, "loss": 0.4781, "step": 7140 }, { "epoch": 1.8073905340420147, "grad_norm": 0.170873761177063, "learning_rate": 8.854442038024957e-06, "loss": 0.4858, "step": 7141 }, { "epoch": 1.8076436345229054, "grad_norm": 0.1646825671195984, "learning_rate": 8.852063509059402e-06, "loss": 0.4702, "step": 7142 }, { "epoch": 1.8078967350037964, "grad_norm": 0.17109772562980652, "learning_rate": 8.849685045902611e-06, "loss": 0.4736, "step": 7143 }, { "epoch": 1.8081498354846874, "grad_norm": 0.17118734121322632, "learning_rate": 8.84730664869094e-06, "loss": 0.482, "step": 7144 }, { "epoch": 1.8084029359655784, "grad_norm": 0.16705068945884705, "learning_rate": 8.84492831756073e-06, "loss": 0.4868, "step": 7145 }, { "epoch": 1.8086560364464692, "grad_norm": 0.17040404677391052, "learning_rate": 8.84255005264833e-06, "loss": 0.5001, "step": 7146 }, { "epoch": 1.8089091369273602, "grad_norm": 0.1679455041885376, "learning_rate": 8.840171854090078e-06, "loss": 0.4835, "step": 7147 }, { "epoch": 1.809162237408251, "grad_norm": 0.16773565113544464, "learning_rate": 8.837793722022317e-06, "loss": 0.4736, "step": 7148 }, { "epoch": 1.809415337889142, "grad_norm": 0.16642512381076813, "learning_rate": 8.835415656581375e-06, "loss": 0.4836, "step": 7149 }, { "epoch": 1.809668438370033, "grad_norm": 0.16902215778827667, "learning_rate": 8.833037657903585e-06, "loss": 0.5081, "step": 7150 }, { "epoch": 1.8099215388509238, "grad_norm": 0.17754876613616943, "learning_rate": 8.830659726125268e-06, "loss": 0.4785, "step": 7151 }, { "epoch": 1.8101746393318148, "grad_norm": 0.16650840640068054, "learning_rate": 8.82828186138275e-06, "loss": 0.4886, "step": 7152 }, { "epoch": 1.8104277398127055, "grad_norm": 0.16342855989933014, "learning_rate": 8.825904063812346e-06, "loss": 0.4599, "step": 7153 }, { "epoch": 1.8106808402935965, "grad_norm": 0.16374650597572327, "learning_rate": 8.823526333550372e-06, "loss": 0.4611, "step": 7154 }, { "epoch": 1.8109339407744875, "grad_norm": 0.1695319265127182, "learning_rate": 8.82114867073314e-06, "loss": 0.4693, "step": 7155 }, { "epoch": 1.8111870412553785, "grad_norm": 0.17356136441230774, "learning_rate": 8.818771075496953e-06, "loss": 0.4992, "step": 7156 }, { "epoch": 1.8114401417362693, "grad_norm": 0.16749443113803864, "learning_rate": 8.816393547978113e-06, "loss": 0.4871, "step": 7157 }, { "epoch": 1.81169324221716, "grad_norm": 0.17425629496574402, "learning_rate": 8.81401608831292e-06, "loss": 0.4902, "step": 7158 }, { "epoch": 1.811946342698051, "grad_norm": 0.17533639073371887, "learning_rate": 8.811638696637668e-06, "loss": 0.4877, "step": 7159 }, { "epoch": 1.812199443178942, "grad_norm": 0.16326135396957397, "learning_rate": 8.80926137308865e-06, "loss": 0.4735, "step": 7160 }, { "epoch": 1.812452543659833, "grad_norm": 0.17205817997455597, "learning_rate": 8.806884117802157e-06, "loss": 0.4968, "step": 7161 }, { "epoch": 1.8127056441407239, "grad_norm": 0.16979466378688812, "learning_rate": 8.804506930914459e-06, "loss": 0.497, "step": 7162 }, { "epoch": 1.8129587446216147, "grad_norm": 0.17228294909000397, "learning_rate": 8.802129812561846e-06, "loss": 0.483, "step": 7163 }, { "epoch": 1.8132118451025057, "grad_norm": 0.1665838360786438, "learning_rate": 8.799752762880589e-06, "loss": 0.4888, "step": 7164 }, { "epoch": 1.8134649455833967, "grad_norm": 0.16238079965114594, "learning_rate": 8.797375782006961e-06, "loss": 0.4917, "step": 7165 }, { "epoch": 1.8137180460642877, "grad_norm": 0.16722612082958221, "learning_rate": 8.79499887007723e-06, "loss": 0.4691, "step": 7166 }, { "epoch": 1.8139711465451784, "grad_norm": 0.16381539404392242, "learning_rate": 8.792622027227655e-06, "loss": 0.4825, "step": 7167 }, { "epoch": 1.8142242470260692, "grad_norm": 0.16971702873706818, "learning_rate": 8.790245253594496e-06, "loss": 0.4848, "step": 7168 }, { "epoch": 1.8144773475069602, "grad_norm": 0.18669746816158295, "learning_rate": 8.787868549314012e-06, "loss": 0.4856, "step": 7169 }, { "epoch": 1.8147304479878512, "grad_norm": 0.16361920535564423, "learning_rate": 8.785491914522455e-06, "loss": 0.4743, "step": 7170 }, { "epoch": 1.8149835484687422, "grad_norm": 0.1797214299440384, "learning_rate": 8.783115349356068e-06, "loss": 0.4869, "step": 7171 }, { "epoch": 1.815236648949633, "grad_norm": 0.17165474593639374, "learning_rate": 8.7807388539511e-06, "loss": 0.4616, "step": 7172 }, { "epoch": 1.8154897494305238, "grad_norm": 0.16584494709968567, "learning_rate": 8.778362428443784e-06, "loss": 0.4836, "step": 7173 }, { "epoch": 1.8157428499114148, "grad_norm": 0.16368629038333893, "learning_rate": 8.77598607297036e-06, "loss": 0.4603, "step": 7174 }, { "epoch": 1.8159959503923058, "grad_norm": 0.1665743738412857, "learning_rate": 8.773609787667057e-06, "loss": 0.4746, "step": 7175 }, { "epoch": 1.8162490508731968, "grad_norm": 0.17082396149635315, "learning_rate": 8.771233572670104e-06, "loss": 0.5048, "step": 7176 }, { "epoch": 1.8165021513540875, "grad_norm": 0.16417402029037476, "learning_rate": 8.768857428115723e-06, "loss": 0.4728, "step": 7177 }, { "epoch": 1.8167552518349783, "grad_norm": 0.16536110639572144, "learning_rate": 8.766481354140138e-06, "loss": 0.4712, "step": 7178 }, { "epoch": 1.8170083523158693, "grad_norm": 0.17106187343597412, "learning_rate": 8.764105350879556e-06, "loss": 0.5069, "step": 7179 }, { "epoch": 1.8172614527967603, "grad_norm": 0.19047637283802032, "learning_rate": 8.761729418470192e-06, "loss": 0.4711, "step": 7180 }, { "epoch": 1.8175145532776513, "grad_norm": 0.1629355102777481, "learning_rate": 8.759353557048254e-06, "loss": 0.4748, "step": 7181 }, { "epoch": 1.817767653758542, "grad_norm": 0.1736607402563095, "learning_rate": 8.756977766749946e-06, "loss": 0.4797, "step": 7182 }, { "epoch": 1.818020754239433, "grad_norm": 0.23817604780197144, "learning_rate": 8.754602047711472e-06, "loss": 0.5126, "step": 7183 }, { "epoch": 1.8182738547203239, "grad_norm": 0.1664394587278366, "learning_rate": 8.752226400069013e-06, "loss": 0.4889, "step": 7184 }, { "epoch": 1.8185269552012149, "grad_norm": 0.17919069528579712, "learning_rate": 8.74985082395877e-06, "loss": 0.501, "step": 7185 }, { "epoch": 1.8187800556821059, "grad_norm": 0.1687518060207367, "learning_rate": 8.747475319516925e-06, "loss": 0.4666, "step": 7186 }, { "epoch": 1.8190331561629967, "grad_norm": 0.16790249943733215, "learning_rate": 8.745099886879667e-06, "loss": 0.488, "step": 7187 }, { "epoch": 1.8192862566438877, "grad_norm": 0.1638140231370926, "learning_rate": 8.742724526183166e-06, "loss": 0.4804, "step": 7188 }, { "epoch": 1.8195393571247784, "grad_norm": 0.1678251028060913, "learning_rate": 8.740349237563608e-06, "loss": 0.486, "step": 7189 }, { "epoch": 1.8197924576056694, "grad_norm": 0.16731011867523193, "learning_rate": 8.737974021157149e-06, "loss": 0.4939, "step": 7190 }, { "epoch": 1.8200455580865604, "grad_norm": 0.17091919481754303, "learning_rate": 8.735598877099965e-06, "loss": 0.502, "step": 7191 }, { "epoch": 1.8202986585674514, "grad_norm": 0.16906003654003143, "learning_rate": 8.733223805528215e-06, "loss": 0.5042, "step": 7192 }, { "epoch": 1.8205517590483422, "grad_norm": 0.17247402667999268, "learning_rate": 8.730848806578058e-06, "loss": 0.4849, "step": 7193 }, { "epoch": 1.820804859529233, "grad_norm": 0.17950056493282318, "learning_rate": 8.728473880385648e-06, "loss": 0.4759, "step": 7194 }, { "epoch": 1.821057960010124, "grad_norm": 0.16923324763774872, "learning_rate": 8.726099027087131e-06, "loss": 0.4752, "step": 7195 }, { "epoch": 1.821311060491015, "grad_norm": 0.1744595468044281, "learning_rate": 8.723724246818655e-06, "loss": 0.4832, "step": 7196 }, { "epoch": 1.821564160971906, "grad_norm": 0.16630634665489197, "learning_rate": 8.72134953971636e-06, "loss": 0.4734, "step": 7197 }, { "epoch": 1.8218172614527968, "grad_norm": 0.16169604659080505, "learning_rate": 8.718974905916384e-06, "loss": 0.4678, "step": 7198 }, { "epoch": 1.8220703619336875, "grad_norm": 0.1642029583454132, "learning_rate": 8.716600345554859e-06, "loss": 0.4888, "step": 7199 }, { "epoch": 1.8223234624145785, "grad_norm": 0.17327143251895905, "learning_rate": 8.714225858767915e-06, "loss": 0.4868, "step": 7200 }, { "epoch": 1.8225765628954695, "grad_norm": 0.16800250113010406, "learning_rate": 8.711851445691674e-06, "loss": 0.4522, "step": 7201 }, { "epoch": 1.8228296633763605, "grad_norm": 0.1667100340127945, "learning_rate": 8.709477106462259e-06, "loss": 0.5022, "step": 7202 }, { "epoch": 1.8230827638572513, "grad_norm": 0.1681675761938095, "learning_rate": 8.70710284121578e-06, "loss": 0.4736, "step": 7203 }, { "epoch": 1.823335864338142, "grad_norm": 0.16937273740768433, "learning_rate": 8.704728650088354e-06, "loss": 0.4729, "step": 7204 }, { "epoch": 1.823588964819033, "grad_norm": 0.16756105422973633, "learning_rate": 8.702354533216093e-06, "loss": 0.4722, "step": 7205 }, { "epoch": 1.823842065299924, "grad_norm": 0.17045949399471283, "learning_rate": 8.699980490735086e-06, "loss": 0.4828, "step": 7206 }, { "epoch": 1.824095165780815, "grad_norm": 0.16832582652568817, "learning_rate": 8.697606522781443e-06, "loss": 0.504, "step": 7207 }, { "epoch": 1.8243482662617059, "grad_norm": 0.19510367512702942, "learning_rate": 8.695232629491254e-06, "loss": 0.4821, "step": 7208 }, { "epoch": 1.8246013667425967, "grad_norm": 0.168720543384552, "learning_rate": 8.692858811000612e-06, "loss": 0.4528, "step": 7209 }, { "epoch": 1.8248544672234877, "grad_norm": 0.16153566539287567, "learning_rate": 8.6904850674456e-06, "loss": 0.4762, "step": 7210 }, { "epoch": 1.8251075677043787, "grad_norm": 0.16327178478240967, "learning_rate": 8.688111398962307e-06, "loss": 0.4485, "step": 7211 }, { "epoch": 1.8253606681852697, "grad_norm": 0.17900285124778748, "learning_rate": 8.685737805686798e-06, "loss": 0.4818, "step": 7212 }, { "epoch": 1.8256137686661604, "grad_norm": 0.17660273611545563, "learning_rate": 8.683364287755153e-06, "loss": 0.5211, "step": 7213 }, { "epoch": 1.8258668691470514, "grad_norm": 0.1704120934009552, "learning_rate": 8.680990845303443e-06, "loss": 0.4767, "step": 7214 }, { "epoch": 1.8261199696279422, "grad_norm": 0.17177322506904602, "learning_rate": 8.67861747846773e-06, "loss": 0.4874, "step": 7215 }, { "epoch": 1.8263730701088332, "grad_norm": 0.17163151502609253, "learning_rate": 8.676244187384075e-06, "loss": 0.4956, "step": 7216 }, { "epoch": 1.8266261705897242, "grad_norm": 0.17782814800739288, "learning_rate": 8.673870972188531e-06, "loss": 0.4788, "step": 7217 }, { "epoch": 1.826879271070615, "grad_norm": 0.17235015332698822, "learning_rate": 8.67149783301715e-06, "loss": 0.4862, "step": 7218 }, { "epoch": 1.827132371551506, "grad_norm": 0.17209725081920624, "learning_rate": 8.66912477000598e-06, "loss": 0.4718, "step": 7219 }, { "epoch": 1.8273854720323968, "grad_norm": 0.16636143624782562, "learning_rate": 8.666751783291066e-06, "loss": 0.4694, "step": 7220 }, { "epoch": 1.8276385725132878, "grad_norm": 0.17050060629844666, "learning_rate": 8.664378873008442e-06, "loss": 0.4813, "step": 7221 }, { "epoch": 1.8278916729941788, "grad_norm": 0.16555699706077576, "learning_rate": 8.662006039294147e-06, "loss": 0.4635, "step": 7222 }, { "epoch": 1.8281447734750698, "grad_norm": 0.16890889406204224, "learning_rate": 8.659633282284207e-06, "loss": 0.4763, "step": 7223 }, { "epoch": 1.8283978739559605, "grad_norm": 0.1719082146883011, "learning_rate": 8.657260602114646e-06, "loss": 0.4681, "step": 7224 }, { "epoch": 1.8286509744368513, "grad_norm": 0.1745995581150055, "learning_rate": 8.654887998921484e-06, "loss": 0.5014, "step": 7225 }, { "epoch": 1.8289040749177423, "grad_norm": 0.16672901809215546, "learning_rate": 8.652515472840744e-06, "loss": 0.5089, "step": 7226 }, { "epoch": 1.8291571753986333, "grad_norm": 0.16738271713256836, "learning_rate": 8.650143024008435e-06, "loss": 0.4608, "step": 7227 }, { "epoch": 1.8294102758795243, "grad_norm": 0.17850981652736664, "learning_rate": 8.647770652560559e-06, "loss": 0.4783, "step": 7228 }, { "epoch": 1.829663376360415, "grad_norm": 0.16211619973182678, "learning_rate": 8.645398358633123e-06, "loss": 0.4971, "step": 7229 }, { "epoch": 1.8299164768413059, "grad_norm": 0.172844797372818, "learning_rate": 8.643026142362128e-06, "loss": 0.4776, "step": 7230 }, { "epoch": 1.8301695773221969, "grad_norm": 0.16679085791110992, "learning_rate": 8.640654003883566e-06, "loss": 0.4766, "step": 7231 }, { "epoch": 1.8304226778030879, "grad_norm": 0.17808124423027039, "learning_rate": 8.638281943333422e-06, "loss": 0.4762, "step": 7232 }, { "epoch": 1.8306757782839789, "grad_norm": 0.16853244602680206, "learning_rate": 8.635909960847693e-06, "loss": 0.4661, "step": 7233 }, { "epoch": 1.8309288787648696, "grad_norm": 0.17415280640125275, "learning_rate": 8.633538056562346e-06, "loss": 0.4728, "step": 7234 }, { "epoch": 1.8311819792457604, "grad_norm": 0.16796250641345978, "learning_rate": 8.631166230613366e-06, "loss": 0.4899, "step": 7235 }, { "epoch": 1.8314350797266514, "grad_norm": 0.17168600857257843, "learning_rate": 8.628794483136722e-06, "loss": 0.4782, "step": 7236 }, { "epoch": 1.8316881802075424, "grad_norm": 0.17053189873695374, "learning_rate": 8.626422814268382e-06, "loss": 0.4613, "step": 7237 }, { "epoch": 1.8319412806884334, "grad_norm": 0.1740400791168213, "learning_rate": 8.624051224144308e-06, "loss": 0.4712, "step": 7238 }, { "epoch": 1.8321943811693242, "grad_norm": 0.17474724352359772, "learning_rate": 8.621679712900462e-06, "loss": 0.4691, "step": 7239 }, { "epoch": 1.832447481650215, "grad_norm": 0.16840998828411102, "learning_rate": 8.61930828067279e-06, "loss": 0.4772, "step": 7240 }, { "epoch": 1.832700582131106, "grad_norm": 0.16817189753055573, "learning_rate": 8.616936927597245e-06, "loss": 0.4553, "step": 7241 }, { "epoch": 1.832953682611997, "grad_norm": 0.16883991658687592, "learning_rate": 8.614565653809775e-06, "loss": 0.5013, "step": 7242 }, { "epoch": 1.833206783092888, "grad_norm": 0.1839446872472763, "learning_rate": 8.612194459446314e-06, "loss": 0.4953, "step": 7243 }, { "epoch": 1.8334598835737788, "grad_norm": 0.16985778510570526, "learning_rate": 8.609823344642807e-06, "loss": 0.4793, "step": 7244 }, { "epoch": 1.8337129840546698, "grad_norm": 0.17215128242969513, "learning_rate": 8.607452309535174e-06, "loss": 0.4594, "step": 7245 }, { "epoch": 1.8339660845355605, "grad_norm": 0.16981396079063416, "learning_rate": 8.605081354259346e-06, "loss": 0.4522, "step": 7246 }, { "epoch": 1.8342191850164515, "grad_norm": 0.1662265658378601, "learning_rate": 8.602710478951245e-06, "loss": 0.4846, "step": 7247 }, { "epoch": 1.8344722854973425, "grad_norm": 0.16640666127204895, "learning_rate": 8.600339683746787e-06, "loss": 0.4908, "step": 7248 }, { "epoch": 1.8347253859782333, "grad_norm": 0.17320498824119568, "learning_rate": 8.597968968781887e-06, "loss": 0.5069, "step": 7249 }, { "epoch": 1.8349784864591243, "grad_norm": 0.16716943681240082, "learning_rate": 8.595598334192456e-06, "loss": 0.4888, "step": 7250 }, { "epoch": 1.835231586940015, "grad_norm": 0.16542191803455353, "learning_rate": 8.59322778011439e-06, "loss": 0.4688, "step": 7251 }, { "epoch": 1.835484687420906, "grad_norm": 0.17316804826259613, "learning_rate": 8.590857306683591e-06, "loss": 0.4552, "step": 7252 }, { "epoch": 1.835737787901797, "grad_norm": 0.16994966566562653, "learning_rate": 8.588486914035955e-06, "loss": 0.476, "step": 7253 }, { "epoch": 1.835990888382688, "grad_norm": 0.17197690904140472, "learning_rate": 8.586116602307366e-06, "loss": 0.5103, "step": 7254 }, { "epoch": 1.8362439888635789, "grad_norm": 0.16419757902622223, "learning_rate": 8.583746371633722e-06, "loss": 0.4621, "step": 7255 }, { "epoch": 1.8364970893444696, "grad_norm": 0.1676648110151291, "learning_rate": 8.581376222150884e-06, "loss": 0.499, "step": 7256 }, { "epoch": 1.8367501898253606, "grad_norm": 0.16912142932415009, "learning_rate": 8.579006153994742e-06, "loss": 0.5049, "step": 7257 }, { "epoch": 1.8370032903062516, "grad_norm": 0.1678188592195511, "learning_rate": 8.576636167301164e-06, "loss": 0.4548, "step": 7258 }, { "epoch": 1.8372563907871426, "grad_norm": 0.16902737319469452, "learning_rate": 8.574266262206015e-06, "loss": 0.4947, "step": 7259 }, { "epoch": 1.8375094912680334, "grad_norm": 0.17290730774402618, "learning_rate": 8.571896438845155e-06, "loss": 0.4891, "step": 7260 }, { "epoch": 1.8377625917489242, "grad_norm": 0.16047313809394836, "learning_rate": 8.569526697354447e-06, "loss": 0.4866, "step": 7261 }, { "epoch": 1.8380156922298152, "grad_norm": 0.16486024856567383, "learning_rate": 8.567157037869736e-06, "loss": 0.4984, "step": 7262 }, { "epoch": 1.8382687927107062, "grad_norm": 0.17097905278205872, "learning_rate": 8.564787460526871e-06, "loss": 0.468, "step": 7263 }, { "epoch": 1.8385218931915972, "grad_norm": 0.17202594876289368, "learning_rate": 8.562417965461698e-06, "loss": 0.4774, "step": 7264 }, { "epoch": 1.838774993672488, "grad_norm": 0.16857367753982544, "learning_rate": 8.560048552810053e-06, "loss": 0.46, "step": 7265 }, { "epoch": 1.8390280941533788, "grad_norm": 0.16494357585906982, "learning_rate": 8.557679222707773e-06, "loss": 0.4673, "step": 7266 }, { "epoch": 1.8392811946342698, "grad_norm": 0.16939778625965118, "learning_rate": 8.555309975290682e-06, "loss": 0.4689, "step": 7267 }, { "epoch": 1.8395342951151608, "grad_norm": 0.1941271722316742, "learning_rate": 8.552940810694604e-06, "loss": 0.5026, "step": 7268 }, { "epoch": 1.8397873955960518, "grad_norm": 0.16597236692905426, "learning_rate": 8.55057172905536e-06, "loss": 0.4719, "step": 7269 }, { "epoch": 1.8400404960769425, "grad_norm": 0.18198348581790924, "learning_rate": 8.548202730508764e-06, "loss": 0.4784, "step": 7270 }, { "epoch": 1.8402935965578333, "grad_norm": 0.18271508812904358, "learning_rate": 8.545833815190628e-06, "loss": 0.4967, "step": 7271 }, { "epoch": 1.8405466970387243, "grad_norm": 0.17448918521404266, "learning_rate": 8.543464983236755e-06, "loss": 0.4967, "step": 7272 }, { "epoch": 1.8407997975196153, "grad_norm": 0.16371208429336548, "learning_rate": 8.541096234782943e-06, "loss": 0.4599, "step": 7273 }, { "epoch": 1.8410528980005063, "grad_norm": 0.17846576869487762, "learning_rate": 8.538727569964988e-06, "loss": 0.4815, "step": 7274 }, { "epoch": 1.841305998481397, "grad_norm": 0.18889407813549042, "learning_rate": 8.536358988918684e-06, "loss": 0.4878, "step": 7275 }, { "epoch": 1.8415590989622879, "grad_norm": 0.17496833205223083, "learning_rate": 8.533990491779812e-06, "loss": 0.4587, "step": 7276 }, { "epoch": 1.8418121994431789, "grad_norm": 0.1680624634027481, "learning_rate": 8.53162207868416e-06, "loss": 0.5085, "step": 7277 }, { "epoch": 1.8420652999240699, "grad_norm": 0.17159336805343628, "learning_rate": 8.529253749767498e-06, "loss": 0.4735, "step": 7278 }, { "epoch": 1.8423184004049609, "grad_norm": 0.17105010151863098, "learning_rate": 8.526885505165595e-06, "loss": 0.4912, "step": 7279 }, { "epoch": 1.8425715008858516, "grad_norm": 0.17554594576358795, "learning_rate": 8.524517345014223e-06, "loss": 0.4832, "step": 7280 }, { "epoch": 1.8428246013667426, "grad_norm": 0.16407592594623566, "learning_rate": 8.522149269449141e-06, "loss": 0.4831, "step": 7281 }, { "epoch": 1.8430777018476334, "grad_norm": 0.1785849630832672, "learning_rate": 8.519781278606108e-06, "loss": 0.4729, "step": 7282 }, { "epoch": 1.8433308023285244, "grad_norm": 0.19690574705600739, "learning_rate": 8.517413372620876e-06, "loss": 0.4921, "step": 7283 }, { "epoch": 1.8435839028094154, "grad_norm": 0.1712683141231537, "learning_rate": 8.51504555162919e-06, "loss": 0.4864, "step": 7284 }, { "epoch": 1.8438370032903062, "grad_norm": 0.1670786589384079, "learning_rate": 8.512677815766787e-06, "loss": 0.4654, "step": 7285 }, { "epoch": 1.8440901037711972, "grad_norm": 0.16777099668979645, "learning_rate": 8.510310165169415e-06, "loss": 0.471, "step": 7286 }, { "epoch": 1.844343204252088, "grad_norm": 0.16625896096229553, "learning_rate": 8.5079425999728e-06, "loss": 0.4904, "step": 7287 }, { "epoch": 1.844596304732979, "grad_norm": 0.16708865761756897, "learning_rate": 8.505575120312672e-06, "loss": 0.4958, "step": 7288 }, { "epoch": 1.84484940521387, "grad_norm": 0.16933731734752655, "learning_rate": 8.503207726324751e-06, "loss": 0.482, "step": 7289 }, { "epoch": 1.845102505694761, "grad_norm": 0.17501449584960938, "learning_rate": 8.500840418144756e-06, "loss": 0.4893, "step": 7290 }, { "epoch": 1.8453556061756518, "grad_norm": 0.16789381206035614, "learning_rate": 8.4984731959084e-06, "loss": 0.4766, "step": 7291 }, { "epoch": 1.8456087066565425, "grad_norm": 0.16886219382286072, "learning_rate": 8.496106059751388e-06, "loss": 0.5025, "step": 7292 }, { "epoch": 1.8458618071374335, "grad_norm": 0.17361664772033691, "learning_rate": 8.493739009809427e-06, "loss": 0.4926, "step": 7293 }, { "epoch": 1.8461149076183245, "grad_norm": 0.1693502962589264, "learning_rate": 8.491372046218217e-06, "loss": 0.476, "step": 7294 }, { "epoch": 1.8463680080992155, "grad_norm": 0.16597245633602142, "learning_rate": 8.489005169113445e-06, "loss": 0.4682, "step": 7295 }, { "epoch": 1.8466211085801063, "grad_norm": 0.17303530871868134, "learning_rate": 8.4866383786308e-06, "loss": 0.5214, "step": 7296 }, { "epoch": 1.846874209060997, "grad_norm": 0.16847838461399078, "learning_rate": 8.484271674905968e-06, "loss": 0.4765, "step": 7297 }, { "epoch": 1.847127309541888, "grad_norm": 0.16755865514278412, "learning_rate": 8.481905058074625e-06, "loss": 0.4652, "step": 7298 }, { "epoch": 1.847380410022779, "grad_norm": 0.6274901032447815, "learning_rate": 8.479538528272448e-06, "loss": 0.4938, "step": 7299 }, { "epoch": 1.84763351050367, "grad_norm": 0.16507656872272491, "learning_rate": 8.4771720856351e-06, "loss": 0.4689, "step": 7300 }, { "epoch": 1.8478866109845609, "grad_norm": 0.1648823320865631, "learning_rate": 8.474805730298242e-06, "loss": 0.4624, "step": 7301 }, { "epoch": 1.8481397114654516, "grad_norm": 0.16854700446128845, "learning_rate": 8.47243946239754e-06, "loss": 0.4749, "step": 7302 }, { "epoch": 1.8483928119463426, "grad_norm": 0.17132091522216797, "learning_rate": 8.470073282068641e-06, "loss": 0.4529, "step": 7303 }, { "epoch": 1.8486459124272336, "grad_norm": 0.16830377280712128, "learning_rate": 8.467707189447197e-06, "loss": 0.5046, "step": 7304 }, { "epoch": 1.8488990129081246, "grad_norm": 0.17675139009952545, "learning_rate": 8.465341184668851e-06, "loss": 0.5166, "step": 7305 }, { "epoch": 1.8491521133890154, "grad_norm": 0.16436350345611572, "learning_rate": 8.462975267869238e-06, "loss": 0.4654, "step": 7306 }, { "epoch": 1.8494052138699062, "grad_norm": 0.1623087227344513, "learning_rate": 8.46060943918399e-06, "loss": 0.4809, "step": 7307 }, { "epoch": 1.8496583143507972, "grad_norm": 0.1685841828584671, "learning_rate": 8.458243698748741e-06, "loss": 0.5023, "step": 7308 }, { "epoch": 1.8499114148316882, "grad_norm": 0.1652073860168457, "learning_rate": 8.455878046699108e-06, "loss": 0.4813, "step": 7309 }, { "epoch": 1.8501645153125792, "grad_norm": 0.17059311270713806, "learning_rate": 8.453512483170715e-06, "loss": 0.4823, "step": 7310 }, { "epoch": 1.85041761579347, "grad_norm": 0.17199496924877167, "learning_rate": 8.45114700829917e-06, "loss": 0.4535, "step": 7311 }, { "epoch": 1.850670716274361, "grad_norm": 0.1717611849308014, "learning_rate": 8.448781622220079e-06, "loss": 0.461, "step": 7312 }, { "epoch": 1.8509238167552517, "grad_norm": 0.16532833874225616, "learning_rate": 8.446416325069048e-06, "loss": 0.4712, "step": 7313 }, { "epoch": 1.8511769172361427, "grad_norm": 0.1629447042942047, "learning_rate": 8.444051116981669e-06, "loss": 0.4636, "step": 7314 }, { "epoch": 1.8514300177170337, "grad_norm": 0.18682603538036346, "learning_rate": 8.441685998093543e-06, "loss": 0.5224, "step": 7315 }, { "epoch": 1.8516831181979245, "grad_norm": 0.16913758218288422, "learning_rate": 8.439320968540256e-06, "loss": 0.4698, "step": 7316 }, { "epoch": 1.8519362186788155, "grad_norm": 0.17152082920074463, "learning_rate": 8.436956028457385e-06, "loss": 0.4793, "step": 7317 }, { "epoch": 1.8521893191597063, "grad_norm": 0.16527120769023895, "learning_rate": 8.434591177980509e-06, "loss": 0.483, "step": 7318 }, { "epoch": 1.8524424196405973, "grad_norm": 0.16684779524803162, "learning_rate": 8.432226417245199e-06, "loss": 0.4781, "step": 7319 }, { "epoch": 1.8526955201214883, "grad_norm": 0.17221699655056, "learning_rate": 8.429861746387022e-06, "loss": 0.4846, "step": 7320 }, { "epoch": 1.8529486206023793, "grad_norm": 0.17218883335590363, "learning_rate": 8.427497165541543e-06, "loss": 0.5038, "step": 7321 }, { "epoch": 1.85320172108327, "grad_norm": 0.1681022197008133, "learning_rate": 8.42513267484432e-06, "loss": 0.5217, "step": 7322 }, { "epoch": 1.8534548215641609, "grad_norm": 0.17113012075424194, "learning_rate": 8.422768274430893e-06, "loss": 0.4748, "step": 7323 }, { "epoch": 1.8537079220450519, "grad_norm": 0.16664718091487885, "learning_rate": 8.420403964436818e-06, "loss": 0.459, "step": 7324 }, { "epoch": 1.8539610225259429, "grad_norm": 0.1649954915046692, "learning_rate": 8.418039744997632e-06, "loss": 0.4847, "step": 7325 }, { "epoch": 1.8542141230068339, "grad_norm": 0.1674114167690277, "learning_rate": 8.415675616248871e-06, "loss": 0.4889, "step": 7326 }, { "epoch": 1.8544672234877246, "grad_norm": 0.16845543682575226, "learning_rate": 8.41331157832607e-06, "loss": 0.4738, "step": 7327 }, { "epoch": 1.8547203239686154, "grad_norm": 0.16865846514701843, "learning_rate": 8.410947631364748e-06, "loss": 0.4824, "step": 7328 }, { "epoch": 1.8549734244495064, "grad_norm": 0.16583672165870667, "learning_rate": 8.408583775500425e-06, "loss": 0.4994, "step": 7329 }, { "epoch": 1.8552265249303974, "grad_norm": 0.17079652845859528, "learning_rate": 8.406220010868621e-06, "loss": 0.4783, "step": 7330 }, { "epoch": 1.8554796254112884, "grad_norm": 0.18369020521640778, "learning_rate": 8.403856337604843e-06, "loss": 0.4823, "step": 7331 }, { "epoch": 1.8557327258921792, "grad_norm": 0.17434315383434296, "learning_rate": 8.401492755844594e-06, "loss": 0.5188, "step": 7332 }, { "epoch": 1.85598582637307, "grad_norm": 0.1666639745235443, "learning_rate": 8.399129265723378e-06, "loss": 0.4709, "step": 7333 }, { "epoch": 1.856238926853961, "grad_norm": 0.17311371862888336, "learning_rate": 8.396765867376683e-06, "loss": 0.4888, "step": 7334 }, { "epoch": 1.856492027334852, "grad_norm": 0.17429691553115845, "learning_rate": 8.394402560940001e-06, "loss": 0.4876, "step": 7335 }, { "epoch": 1.856745127815743, "grad_norm": 0.1747545450925827, "learning_rate": 8.392039346548811e-06, "loss": 0.4817, "step": 7336 }, { "epoch": 1.8569982282966337, "grad_norm": 0.1656615138053894, "learning_rate": 8.389676224338595e-06, "loss": 0.4964, "step": 7337 }, { "epoch": 1.8572513287775245, "grad_norm": 0.17241881787776947, "learning_rate": 8.38731319444483e-06, "loss": 0.4844, "step": 7338 }, { "epoch": 1.8575044292584155, "grad_norm": 0.17984658479690552, "learning_rate": 8.384950257002974e-06, "loss": 0.4854, "step": 7339 }, { "epoch": 1.8577575297393065, "grad_norm": 0.16832594573497772, "learning_rate": 8.382587412148495e-06, "loss": 0.4684, "step": 7340 }, { "epoch": 1.8580106302201975, "grad_norm": 0.17127679288387299, "learning_rate": 8.380224660016848e-06, "loss": 0.459, "step": 7341 }, { "epoch": 1.8582637307010883, "grad_norm": 0.17019924521446228, "learning_rate": 8.377862000743486e-06, "loss": 0.492, "step": 7342 }, { "epoch": 1.8585168311819793, "grad_norm": 0.16403087973594666, "learning_rate": 8.375499434463855e-06, "loss": 0.5063, "step": 7343 }, { "epoch": 1.85876993166287, "grad_norm": 0.1730656623840332, "learning_rate": 8.373136961313399e-06, "loss": 0.4479, "step": 7344 }, { "epoch": 1.859023032143761, "grad_norm": 0.17351488769054413, "learning_rate": 8.370774581427544e-06, "loss": 0.4685, "step": 7345 }, { "epoch": 1.859276132624652, "grad_norm": 0.1735028326511383, "learning_rate": 8.36841229494173e-06, "loss": 0.4848, "step": 7346 }, { "epoch": 1.8595292331055429, "grad_norm": 0.17205369472503662, "learning_rate": 8.366050101991375e-06, "loss": 0.5032, "step": 7347 }, { "epoch": 1.8597823335864339, "grad_norm": 0.1644473522901535, "learning_rate": 8.363688002711904e-06, "loss": 0.5118, "step": 7348 }, { "epoch": 1.8600354340673246, "grad_norm": 0.17186890542507172, "learning_rate": 8.36132599723873e-06, "loss": 0.4927, "step": 7349 }, { "epoch": 1.8602885345482156, "grad_norm": 0.16442647576332092, "learning_rate": 8.35896408570726e-06, "loss": 0.4935, "step": 7350 }, { "epoch": 1.8605416350291066, "grad_norm": 0.1664203554391861, "learning_rate": 8.356602268252894e-06, "loss": 0.4736, "step": 7351 }, { "epoch": 1.8607947355099976, "grad_norm": 0.1678372174501419, "learning_rate": 8.354240545011037e-06, "loss": 0.4542, "step": 7352 }, { "epoch": 1.8610478359908884, "grad_norm": 0.17300380766391754, "learning_rate": 8.351878916117077e-06, "loss": 0.465, "step": 7353 }, { "epoch": 1.8613009364717792, "grad_norm": 0.16729050874710083, "learning_rate": 8.349517381706404e-06, "loss": 0.4786, "step": 7354 }, { "epoch": 1.8615540369526702, "grad_norm": 0.16783970594406128, "learning_rate": 8.3471559419144e-06, "loss": 0.4673, "step": 7355 }, { "epoch": 1.8618071374335612, "grad_norm": 0.1690918505191803, "learning_rate": 8.344794596876437e-06, "loss": 0.4775, "step": 7356 }, { "epoch": 1.8620602379144522, "grad_norm": 0.17120561003684998, "learning_rate": 8.34243334672789e-06, "loss": 0.4806, "step": 7357 }, { "epoch": 1.862313338395343, "grad_norm": 0.18912899494171143, "learning_rate": 8.340072191604121e-06, "loss": 0.4987, "step": 7358 }, { "epoch": 1.8625664388762337, "grad_norm": 0.17735163867473602, "learning_rate": 8.337711131640494e-06, "loss": 0.4878, "step": 7359 }, { "epoch": 1.8628195393571247, "grad_norm": 0.16653811931610107, "learning_rate": 8.335350166972364e-06, "loss": 0.4908, "step": 7360 }, { "epoch": 1.8630726398380157, "grad_norm": 0.1725742071866989, "learning_rate": 8.332989297735074e-06, "loss": 0.4744, "step": 7361 }, { "epoch": 1.8633257403189067, "grad_norm": 0.16985781490802765, "learning_rate": 8.330628524063975e-06, "loss": 0.4807, "step": 7362 }, { "epoch": 1.8635788407997975, "grad_norm": 0.1741483509540558, "learning_rate": 8.328267846094398e-06, "loss": 0.4609, "step": 7363 }, { "epoch": 1.8638319412806883, "grad_norm": 1.4781297445297241, "learning_rate": 8.325907263961679e-06, "loss": 0.5662, "step": 7364 }, { "epoch": 1.8640850417615793, "grad_norm": 0.16808034479618073, "learning_rate": 8.323546777801146e-06, "loss": 0.4651, "step": 7365 }, { "epoch": 1.8643381422424703, "grad_norm": 0.1706436425447464, "learning_rate": 8.321186387748127e-06, "loss": 0.4868, "step": 7366 }, { "epoch": 1.8645912427233613, "grad_norm": 0.1626710444688797, "learning_rate": 8.318826093937923e-06, "loss": 0.4793, "step": 7367 }, { "epoch": 1.864844343204252, "grad_norm": 0.16747617721557617, "learning_rate": 8.316465896505855e-06, "loss": 0.4861, "step": 7368 }, { "epoch": 1.8650974436851429, "grad_norm": 0.16405919194221497, "learning_rate": 8.314105795587225e-06, "loss": 0.4726, "step": 7369 }, { "epoch": 1.8653505441660339, "grad_norm": 0.17476406693458557, "learning_rate": 8.311745791317334e-06, "loss": 0.4876, "step": 7370 }, { "epoch": 1.8656036446469249, "grad_norm": 0.16558048129081726, "learning_rate": 8.309385883831475e-06, "loss": 0.4684, "step": 7371 }, { "epoch": 1.8658567451278159, "grad_norm": 0.16529086232185364, "learning_rate": 8.307026073264943e-06, "loss": 0.494, "step": 7372 }, { "epoch": 1.8661098456087066, "grad_norm": 0.16214488446712494, "learning_rate": 8.304666359753008e-06, "loss": 0.4515, "step": 7373 }, { "epoch": 1.8663629460895974, "grad_norm": 0.1669948250055313, "learning_rate": 8.302306743430957e-06, "loss": 0.4754, "step": 7374 }, { "epoch": 1.8666160465704884, "grad_norm": 0.17556220293045044, "learning_rate": 8.299947224434058e-06, "loss": 0.4745, "step": 7375 }, { "epoch": 1.8668691470513794, "grad_norm": 0.211268812417984, "learning_rate": 8.29758780289758e-06, "loss": 0.4735, "step": 7376 }, { "epoch": 1.8671222475322704, "grad_norm": 0.17003364861011505, "learning_rate": 8.295228478956782e-06, "loss": 0.4891, "step": 7377 }, { "epoch": 1.8673753480131612, "grad_norm": 0.1720387041568756, "learning_rate": 8.292869252746918e-06, "loss": 0.4997, "step": 7378 }, { "epoch": 1.8676284484940522, "grad_norm": 0.17400363087654114, "learning_rate": 8.29051012440324e-06, "loss": 0.4562, "step": 7379 }, { "epoch": 1.867881548974943, "grad_norm": 0.1788504272699356, "learning_rate": 8.288151094060987e-06, "loss": 0.5167, "step": 7380 }, { "epoch": 1.868134649455834, "grad_norm": 0.17369627952575684, "learning_rate": 8.285792161855404e-06, "loss": 0.4942, "step": 7381 }, { "epoch": 1.868387749936725, "grad_norm": 0.16728538274765015, "learning_rate": 8.283433327921717e-06, "loss": 0.4627, "step": 7382 }, { "epoch": 1.8686408504176157, "grad_norm": 0.17070519924163818, "learning_rate": 8.281074592395162e-06, "loss": 0.4903, "step": 7383 }, { "epoch": 1.8688939508985067, "grad_norm": 0.1723376363515854, "learning_rate": 8.278715955410949e-06, "loss": 0.4958, "step": 7384 }, { "epoch": 1.8691470513793975, "grad_norm": 0.16786590218544006, "learning_rate": 8.2763574171043e-06, "loss": 0.4942, "step": 7385 }, { "epoch": 1.8694001518602885, "grad_norm": 0.16990673542022705, "learning_rate": 8.273998977610422e-06, "loss": 0.4749, "step": 7386 }, { "epoch": 1.8696532523411795, "grad_norm": 0.17085836827754974, "learning_rate": 8.271640637064525e-06, "loss": 0.4855, "step": 7387 }, { "epoch": 1.8699063528220705, "grad_norm": 0.16655170917510986, "learning_rate": 8.269282395601805e-06, "loss": 0.4684, "step": 7388 }, { "epoch": 1.8701594533029613, "grad_norm": 0.174225315451622, "learning_rate": 8.26692425335745e-06, "loss": 0.5024, "step": 7389 }, { "epoch": 1.870412553783852, "grad_norm": 0.1662006974220276, "learning_rate": 8.264566210466653e-06, "loss": 0.4766, "step": 7390 }, { "epoch": 1.870665654264743, "grad_norm": 0.17303413152694702, "learning_rate": 8.262208267064593e-06, "loss": 0.4624, "step": 7391 }, { "epoch": 1.870918754745634, "grad_norm": 0.18937097489833832, "learning_rate": 8.259850423286448e-06, "loss": 0.4939, "step": 7392 }, { "epoch": 1.871171855226525, "grad_norm": 0.16780027747154236, "learning_rate": 8.257492679267384e-06, "loss": 0.482, "step": 7393 }, { "epoch": 1.8714249557074158, "grad_norm": 0.17505444586277008, "learning_rate": 8.255135035142576e-06, "loss": 0.4695, "step": 7394 }, { "epoch": 1.8716780561883066, "grad_norm": 0.16769373416900635, "learning_rate": 8.252777491047168e-06, "loss": 0.4553, "step": 7395 }, { "epoch": 1.8719311566691976, "grad_norm": 0.1783466935157776, "learning_rate": 8.250420047116322e-06, "loss": 0.4797, "step": 7396 }, { "epoch": 1.8721842571500886, "grad_norm": 0.17111344635486603, "learning_rate": 8.248062703485184e-06, "loss": 0.4875, "step": 7397 }, { "epoch": 1.8724373576309796, "grad_norm": 0.16805164515972137, "learning_rate": 8.245705460288895e-06, "loss": 0.475, "step": 7398 }, { "epoch": 1.8726904581118704, "grad_norm": 0.1913883090019226, "learning_rate": 8.243348317662593e-06, "loss": 0.4646, "step": 7399 }, { "epoch": 1.8729435585927612, "grad_norm": 0.17365223169326782, "learning_rate": 8.240991275741401e-06, "loss": 0.4992, "step": 7400 }, { "epoch": 1.8731966590736522, "grad_norm": 0.1800697296857834, "learning_rate": 8.23863433466045e-06, "loss": 0.5151, "step": 7401 }, { "epoch": 1.8734497595545432, "grad_norm": 0.1640055626630783, "learning_rate": 8.236277494554853e-06, "loss": 0.4537, "step": 7402 }, { "epoch": 1.8737028600354342, "grad_norm": 0.16595253348350525, "learning_rate": 8.23392075555973e-06, "loss": 0.4857, "step": 7403 }, { "epoch": 1.873955960516325, "grad_norm": 0.17073789238929749, "learning_rate": 8.231564117810183e-06, "loss": 0.4961, "step": 7404 }, { "epoch": 1.8742090609972157, "grad_norm": 0.17276127636432648, "learning_rate": 8.229207581441316e-06, "loss": 0.4562, "step": 7405 }, { "epoch": 1.8744621614781067, "grad_norm": 0.16723424196243286, "learning_rate": 8.22685114658822e-06, "loss": 0.4867, "step": 7406 }, { "epoch": 1.8747152619589977, "grad_norm": 0.16834837198257446, "learning_rate": 8.224494813385986e-06, "loss": 0.4607, "step": 7407 }, { "epoch": 1.8749683624398887, "grad_norm": 0.1688464730978012, "learning_rate": 8.222138581969698e-06, "loss": 0.5115, "step": 7408 }, { "epoch": 1.8752214629207795, "grad_norm": 0.1767568439245224, "learning_rate": 8.219782452474432e-06, "loss": 0.481, "step": 7409 }, { "epoch": 1.8754745634016705, "grad_norm": 0.17189620435237885, "learning_rate": 8.217426425035268e-06, "loss": 0.4787, "step": 7410 }, { "epoch": 1.8757276638825613, "grad_norm": 0.16932186484336853, "learning_rate": 8.21507049978726e-06, "loss": 0.4693, "step": 7411 }, { "epoch": 1.8759807643634523, "grad_norm": 0.23553138971328735, "learning_rate": 8.212714676865475e-06, "loss": 0.4906, "step": 7412 }, { "epoch": 1.8762338648443433, "grad_norm": 0.16571515798568726, "learning_rate": 8.210358956404967e-06, "loss": 0.4941, "step": 7413 }, { "epoch": 1.876486965325234, "grad_norm": 0.17635861039161682, "learning_rate": 8.208003338540785e-06, "loss": 0.4944, "step": 7414 }, { "epoch": 1.876740065806125, "grad_norm": 0.1667156219482422, "learning_rate": 8.205647823407965e-06, "loss": 0.4742, "step": 7415 }, { "epoch": 1.8769931662870158, "grad_norm": 0.16873173415660858, "learning_rate": 8.203292411141557e-06, "loss": 0.4722, "step": 7416 }, { "epoch": 1.8772462667679068, "grad_norm": 0.16841930150985718, "learning_rate": 8.200937101876579e-06, "loss": 0.4733, "step": 7417 }, { "epoch": 1.8774993672487978, "grad_norm": 0.1674862951040268, "learning_rate": 8.198581895748061e-06, "loss": 0.4686, "step": 7418 }, { "epoch": 1.8777524677296888, "grad_norm": 0.17266146838665009, "learning_rate": 8.196226792891022e-06, "loss": 0.48, "step": 7419 }, { "epoch": 1.8780055682105796, "grad_norm": 0.16845154762268066, "learning_rate": 8.193871793440475e-06, "loss": 0.4616, "step": 7420 }, { "epoch": 1.8782586686914704, "grad_norm": 0.16572798788547516, "learning_rate": 8.19151689753143e-06, "loss": 0.4526, "step": 7421 }, { "epoch": 1.8785117691723614, "grad_norm": 0.19001463055610657, "learning_rate": 8.189162105298881e-06, "loss": 0.4945, "step": 7422 }, { "epoch": 1.8787648696532524, "grad_norm": 0.16886477172374725, "learning_rate": 8.186807416877828e-06, "loss": 0.5005, "step": 7423 }, { "epoch": 1.8790179701341434, "grad_norm": 0.16759172081947327, "learning_rate": 8.184452832403258e-06, "loss": 0.4761, "step": 7424 }, { "epoch": 1.8792710706150342, "grad_norm": 0.1706591099500656, "learning_rate": 8.182098352010158e-06, "loss": 0.5061, "step": 7425 }, { "epoch": 1.879524171095925, "grad_norm": 0.1736859232187271, "learning_rate": 8.179743975833502e-06, "loss": 0.5006, "step": 7426 }, { "epoch": 1.879777271576816, "grad_norm": 0.16453886032104492, "learning_rate": 8.177389704008265e-06, "loss": 0.4705, "step": 7427 }, { "epoch": 1.880030372057707, "grad_norm": 0.17052152752876282, "learning_rate": 8.175035536669409e-06, "loss": 0.4856, "step": 7428 }, { "epoch": 1.880283472538598, "grad_norm": 0.17098338901996613, "learning_rate": 8.172681473951894e-06, "loss": 0.5012, "step": 7429 }, { "epoch": 1.8805365730194887, "grad_norm": 0.1763639599084854, "learning_rate": 8.170327515990675e-06, "loss": 0.4839, "step": 7430 }, { "epoch": 1.8807896735003795, "grad_norm": 0.20828339457511902, "learning_rate": 8.167973662920693e-06, "loss": 0.4802, "step": 7431 }, { "epoch": 1.8810427739812705, "grad_norm": 0.17169591784477234, "learning_rate": 8.1656199148769e-06, "loss": 0.5345, "step": 7432 }, { "epoch": 1.8812958744621615, "grad_norm": 0.16908994317054749, "learning_rate": 8.163266271994228e-06, "loss": 0.4779, "step": 7433 }, { "epoch": 1.8815489749430525, "grad_norm": 0.16826267540454865, "learning_rate": 8.1609127344076e-06, "loss": 0.4431, "step": 7434 }, { "epoch": 1.8818020754239433, "grad_norm": 0.17266221344470978, "learning_rate": 8.158559302251947e-06, "loss": 0.4849, "step": 7435 }, { "epoch": 1.882055175904834, "grad_norm": 0.17184031009674072, "learning_rate": 8.156205975662182e-06, "loss": 0.4717, "step": 7436 }, { "epoch": 1.882308276385725, "grad_norm": 0.17031453549861908, "learning_rate": 8.153852754773214e-06, "loss": 0.5021, "step": 7437 }, { "epoch": 1.882561376866616, "grad_norm": 0.16920770704746246, "learning_rate": 8.151499639719958e-06, "loss": 0.4759, "step": 7438 }, { "epoch": 1.882814477347507, "grad_norm": 0.1694176346063614, "learning_rate": 8.149146630637304e-06, "loss": 0.4906, "step": 7439 }, { "epoch": 1.8830675778283978, "grad_norm": 0.165378138422966, "learning_rate": 8.146793727660142e-06, "loss": 0.4882, "step": 7440 }, { "epoch": 1.8833206783092888, "grad_norm": 0.17271688580513, "learning_rate": 8.14444093092337e-06, "loss": 0.4935, "step": 7441 }, { "epoch": 1.8835737787901796, "grad_norm": 0.17486535012722015, "learning_rate": 8.142088240561862e-06, "loss": 0.462, "step": 7442 }, { "epoch": 1.8838268792710706, "grad_norm": 0.16984353959560394, "learning_rate": 8.139735656710495e-06, "loss": 0.5126, "step": 7443 }, { "epoch": 1.8840799797519616, "grad_norm": 0.1666148602962494, "learning_rate": 8.137383179504137e-06, "loss": 0.4676, "step": 7444 }, { "epoch": 1.8843330802328524, "grad_norm": 0.17122270166873932, "learning_rate": 8.135030809077648e-06, "loss": 0.4748, "step": 7445 }, { "epoch": 1.8845861807137434, "grad_norm": 0.16459473967552185, "learning_rate": 8.132678545565887e-06, "loss": 0.458, "step": 7446 }, { "epoch": 1.8848392811946342, "grad_norm": 0.16632671654224396, "learning_rate": 8.130326389103704e-06, "loss": 0.4709, "step": 7447 }, { "epoch": 1.8850923816755252, "grad_norm": 0.16611498594284058, "learning_rate": 8.127974339825945e-06, "loss": 0.4811, "step": 7448 }, { "epoch": 1.8853454821564162, "grad_norm": 0.1644921451807022, "learning_rate": 8.125622397867446e-06, "loss": 0.4596, "step": 7449 }, { "epoch": 1.8855985826373072, "grad_norm": 0.1683819591999054, "learning_rate": 8.123270563363038e-06, "loss": 0.4591, "step": 7450 }, { "epoch": 1.885851683118198, "grad_norm": 0.1657133847475052, "learning_rate": 8.120918836447547e-06, "loss": 0.4753, "step": 7451 }, { "epoch": 1.8861047835990887, "grad_norm": 0.17507702112197876, "learning_rate": 8.118567217255794e-06, "loss": 0.5014, "step": 7452 }, { "epoch": 1.8863578840799797, "grad_norm": 0.1717086285352707, "learning_rate": 8.116215705922589e-06, "loss": 0.4912, "step": 7453 }, { "epoch": 1.8866109845608707, "grad_norm": 0.16834241151809692, "learning_rate": 8.113864302582744e-06, "loss": 0.4745, "step": 7454 }, { "epoch": 1.8868640850417617, "grad_norm": 0.17972497642040253, "learning_rate": 8.11151300737106e-06, "loss": 0.4807, "step": 7455 }, { "epoch": 1.8871171855226525, "grad_norm": 0.16771052777767181, "learning_rate": 8.109161820422327e-06, "loss": 0.4639, "step": 7456 }, { "epoch": 1.8873702860035433, "grad_norm": 0.16740871965885162, "learning_rate": 8.106810741871334e-06, "loss": 0.4495, "step": 7457 }, { "epoch": 1.8876233864844343, "grad_norm": 0.16814324259757996, "learning_rate": 8.104459771852867e-06, "loss": 0.4899, "step": 7458 }, { "epoch": 1.8878764869653253, "grad_norm": 0.17301733791828156, "learning_rate": 8.102108910501699e-06, "loss": 0.4744, "step": 7459 }, { "epoch": 1.8881295874462163, "grad_norm": 0.17092899978160858, "learning_rate": 8.099758157952607e-06, "loss": 0.5061, "step": 7460 }, { "epoch": 1.888382687927107, "grad_norm": 0.1707819253206253, "learning_rate": 8.097407514340343e-06, "loss": 0.4683, "step": 7461 }, { "epoch": 1.8886357884079978, "grad_norm": 0.17907924950122833, "learning_rate": 8.095056979799669e-06, "loss": 0.4863, "step": 7462 }, { "epoch": 1.8888888888888888, "grad_norm": 0.1786811202764511, "learning_rate": 8.09270655446534e-06, "loss": 0.4881, "step": 7463 }, { "epoch": 1.8891419893697798, "grad_norm": 0.16476896405220032, "learning_rate": 8.090356238472098e-06, "loss": 0.4644, "step": 7464 }, { "epoch": 1.8893950898506708, "grad_norm": 0.1666848063468933, "learning_rate": 8.088006031954683e-06, "loss": 0.4838, "step": 7465 }, { "epoch": 1.8896481903315616, "grad_norm": 0.1696503907442093, "learning_rate": 8.085655935047829e-06, "loss": 0.4601, "step": 7466 }, { "epoch": 1.8899012908124524, "grad_norm": 0.1681821197271347, "learning_rate": 8.083305947886256e-06, "loss": 0.4839, "step": 7467 }, { "epoch": 1.8901543912933434, "grad_norm": 0.1712205857038498, "learning_rate": 8.080956070604686e-06, "loss": 0.4706, "step": 7468 }, { "epoch": 1.8904074917742344, "grad_norm": 0.1754671186208725, "learning_rate": 8.078606303337835e-06, "loss": 0.5106, "step": 7469 }, { "epoch": 1.8906605922551254, "grad_norm": 0.18262942135334015, "learning_rate": 8.07625664622041e-06, "loss": 0.4854, "step": 7470 }, { "epoch": 1.8909136927360162, "grad_norm": 0.1766863912343979, "learning_rate": 8.073907099387112e-06, "loss": 0.4914, "step": 7471 }, { "epoch": 1.891166793216907, "grad_norm": 0.16962265968322754, "learning_rate": 8.071557662972632e-06, "loss": 0.4853, "step": 7472 }, { "epoch": 1.891419893697798, "grad_norm": 0.16988873481750488, "learning_rate": 8.06920833711166e-06, "loss": 0.4616, "step": 7473 }, { "epoch": 1.891672994178689, "grad_norm": 0.16417275369167328, "learning_rate": 8.066859121938878e-06, "loss": 0.4677, "step": 7474 }, { "epoch": 1.89192609465958, "grad_norm": 0.16536366939544678, "learning_rate": 8.064510017588962e-06, "loss": 0.4768, "step": 7475 }, { "epoch": 1.8921791951404707, "grad_norm": 0.17168404161930084, "learning_rate": 8.062161024196583e-06, "loss": 0.4704, "step": 7476 }, { "epoch": 1.8924322956213617, "grad_norm": 0.16821901500225067, "learning_rate": 8.059812141896402e-06, "loss": 0.4818, "step": 7477 }, { "epoch": 1.8926853961022525, "grad_norm": 0.16313108801841736, "learning_rate": 8.057463370823075e-06, "loss": 0.4831, "step": 7478 }, { "epoch": 1.8929384965831435, "grad_norm": 0.16990292072296143, "learning_rate": 8.055114711111251e-06, "loss": 0.4817, "step": 7479 }, { "epoch": 1.8931915970640345, "grad_norm": 0.1662517786026001, "learning_rate": 8.052766162895575e-06, "loss": 0.4643, "step": 7480 }, { "epoch": 1.8934446975449253, "grad_norm": 0.17118866741657257, "learning_rate": 8.050417726310684e-06, "loss": 0.4617, "step": 7481 }, { "epoch": 1.8936977980258163, "grad_norm": 0.1688842922449112, "learning_rate": 8.048069401491213e-06, "loss": 0.4683, "step": 7482 }, { "epoch": 1.893950898506707, "grad_norm": 0.17204439640045166, "learning_rate": 8.045721188571781e-06, "loss": 0.4608, "step": 7483 }, { "epoch": 1.894203998987598, "grad_norm": 0.16906671226024628, "learning_rate": 8.043373087687005e-06, "loss": 0.494, "step": 7484 }, { "epoch": 1.894457099468489, "grad_norm": 0.1778915375471115, "learning_rate": 8.0410250989715e-06, "loss": 0.4696, "step": 7485 }, { "epoch": 1.89471019994938, "grad_norm": 0.16858923435211182, "learning_rate": 8.038677222559872e-06, "loss": 0.4726, "step": 7486 }, { "epoch": 1.8949633004302708, "grad_norm": 0.17160660028457642, "learning_rate": 8.036329458586718e-06, "loss": 0.5249, "step": 7487 }, { "epoch": 1.8952164009111616, "grad_norm": 0.22659559547901154, "learning_rate": 8.033981807186633e-06, "loss": 0.471, "step": 7488 }, { "epoch": 1.8954695013920526, "grad_norm": 0.16896478831768036, "learning_rate": 8.031634268494197e-06, "loss": 0.507, "step": 7489 }, { "epoch": 1.8957226018729436, "grad_norm": 0.17792052030563354, "learning_rate": 8.029286842643993e-06, "loss": 0.4748, "step": 7490 }, { "epoch": 1.8959757023538346, "grad_norm": 0.17427538335323334, "learning_rate": 8.026939529770595e-06, "loss": 0.4629, "step": 7491 }, { "epoch": 1.8962288028347254, "grad_norm": 0.16935940086841583, "learning_rate": 8.024592330008569e-06, "loss": 0.4834, "step": 7492 }, { "epoch": 1.8964819033156162, "grad_norm": 0.1668558567762375, "learning_rate": 8.022245243492476e-06, "loss": 0.4681, "step": 7493 }, { "epoch": 1.8967350037965072, "grad_norm": 0.16823482513427734, "learning_rate": 8.019898270356866e-06, "loss": 0.488, "step": 7494 }, { "epoch": 1.8969881042773982, "grad_norm": 0.17248710989952087, "learning_rate": 8.017551410736288e-06, "loss": 0.474, "step": 7495 }, { "epoch": 1.8972412047582892, "grad_norm": 0.1775652915239334, "learning_rate": 8.01520466476528e-06, "loss": 0.4763, "step": 7496 }, { "epoch": 1.89749430523918, "grad_norm": 0.17304745316505432, "learning_rate": 8.01285803257838e-06, "loss": 0.4897, "step": 7497 }, { "epoch": 1.8977474057200707, "grad_norm": 0.1608479768037796, "learning_rate": 8.010511514310116e-06, "loss": 0.469, "step": 7498 }, { "epoch": 1.8980005062009617, "grad_norm": 0.16448229551315308, "learning_rate": 8.008165110095005e-06, "loss": 0.4638, "step": 7499 }, { "epoch": 1.8982536066818527, "grad_norm": 0.16758199036121368, "learning_rate": 8.005818820067563e-06, "loss": 0.4701, "step": 7500 }, { "epoch": 1.8985067071627437, "grad_norm": 0.1705644130706787, "learning_rate": 8.003472644362299e-06, "loss": 0.4675, "step": 7501 }, { "epoch": 1.8987598076436345, "grad_norm": 0.16649630665779114, "learning_rate": 8.001126583113712e-06, "loss": 0.491, "step": 7502 }, { "epoch": 1.8990129081245253, "grad_norm": 0.1777772307395935, "learning_rate": 7.998780636456297e-06, "loss": 0.4797, "step": 7503 }, { "epoch": 1.8992660086054163, "grad_norm": 0.17154017090797424, "learning_rate": 7.996434804524543e-06, "loss": 0.5082, "step": 7504 }, { "epoch": 1.8995191090863073, "grad_norm": 0.17361652851104736, "learning_rate": 7.994089087452937e-06, "loss": 0.472, "step": 7505 }, { "epoch": 1.8997722095671983, "grad_norm": 0.16966256499290466, "learning_rate": 7.991743485375943e-06, "loss": 0.4956, "step": 7506 }, { "epoch": 1.900025310048089, "grad_norm": 0.18031036853790283, "learning_rate": 7.989397998428035e-06, "loss": 0.4862, "step": 7507 }, { "epoch": 1.90027841052898, "grad_norm": 0.16264291107654572, "learning_rate": 7.987052626743676e-06, "loss": 0.4742, "step": 7508 }, { "epoch": 1.9005315110098708, "grad_norm": 0.16964517533779144, "learning_rate": 7.984707370457321e-06, "loss": 0.476, "step": 7509 }, { "epoch": 1.9007846114907618, "grad_norm": 0.17460325360298157, "learning_rate": 7.982362229703417e-06, "loss": 0.465, "step": 7510 }, { "epoch": 1.9010377119716528, "grad_norm": 0.17012004554271698, "learning_rate": 7.980017204616407e-06, "loss": 0.478, "step": 7511 }, { "epoch": 1.9012908124525436, "grad_norm": 0.16563810408115387, "learning_rate": 7.977672295330722e-06, "loss": 0.4737, "step": 7512 }, { "epoch": 1.9015439129334346, "grad_norm": 0.19024330377578735, "learning_rate": 7.975327501980798e-06, "loss": 0.4925, "step": 7513 }, { "epoch": 1.9017970134143254, "grad_norm": 0.17067058384418488, "learning_rate": 7.972982824701053e-06, "loss": 0.4863, "step": 7514 }, { "epoch": 1.9020501138952164, "grad_norm": 0.16982075572013855, "learning_rate": 7.970638263625903e-06, "loss": 0.47, "step": 7515 }, { "epoch": 1.9023032143761074, "grad_norm": 0.16970059275627136, "learning_rate": 7.968293818889758e-06, "loss": 0.515, "step": 7516 }, { "epoch": 1.9025563148569984, "grad_norm": 0.17434737086296082, "learning_rate": 7.965949490627016e-06, "loss": 0.489, "step": 7517 }, { "epoch": 1.9028094153378892, "grad_norm": 0.16564679145812988, "learning_rate": 7.963605278972076e-06, "loss": 0.5205, "step": 7518 }, { "epoch": 1.90306251581878, "grad_norm": 0.17137768864631653, "learning_rate": 7.961261184059324e-06, "loss": 0.4689, "step": 7519 }, { "epoch": 1.903315616299671, "grad_norm": 0.16384927928447723, "learning_rate": 7.958917206023145e-06, "loss": 0.4564, "step": 7520 }, { "epoch": 1.903568716780562, "grad_norm": 0.17546696960926056, "learning_rate": 7.956573344997916e-06, "loss": 0.5096, "step": 7521 }, { "epoch": 1.903821817261453, "grad_norm": 0.1729518324136734, "learning_rate": 7.954229601117999e-06, "loss": 0.4868, "step": 7522 }, { "epoch": 1.9040749177423437, "grad_norm": 0.17194864153862, "learning_rate": 7.951885974517758e-06, "loss": 0.5043, "step": 7523 }, { "epoch": 1.9043280182232345, "grad_norm": 0.17106623947620392, "learning_rate": 7.949542465331552e-06, "loss": 0.4785, "step": 7524 }, { "epoch": 1.9045811187041255, "grad_norm": 0.1716916412115097, "learning_rate": 7.947199073693726e-06, "loss": 0.476, "step": 7525 }, { "epoch": 1.9048342191850165, "grad_norm": 0.1751440465450287, "learning_rate": 7.944855799738621e-06, "loss": 0.4968, "step": 7526 }, { "epoch": 1.9050873196659075, "grad_norm": 0.1735166758298874, "learning_rate": 7.942512643600582e-06, "loss": 0.4991, "step": 7527 }, { "epoch": 1.9053404201467983, "grad_norm": 0.17454925179481506, "learning_rate": 7.940169605413918e-06, "loss": 0.4921, "step": 7528 }, { "epoch": 1.905593520627689, "grad_norm": 0.1722957044839859, "learning_rate": 7.937826685312967e-06, "loss": 0.4618, "step": 7529 }, { "epoch": 1.90584662110858, "grad_norm": 0.17657631635665894, "learning_rate": 7.935483883432036e-06, "loss": 0.4823, "step": 7530 }, { "epoch": 1.906099721589471, "grad_norm": 0.1683785617351532, "learning_rate": 7.933141199905435e-06, "loss": 0.4543, "step": 7531 }, { "epoch": 1.906352822070362, "grad_norm": 0.17016401886940002, "learning_rate": 7.930798634867467e-06, "loss": 0.4771, "step": 7532 }, { "epoch": 1.9066059225512528, "grad_norm": 0.16907022893428802, "learning_rate": 7.92845618845242e-06, "loss": 0.4674, "step": 7533 }, { "epoch": 1.9068590230321436, "grad_norm": 0.174383744597435, "learning_rate": 7.926113860794586e-06, "loss": 0.4914, "step": 7534 }, { "epoch": 1.9071121235130346, "grad_norm": 0.16981613636016846, "learning_rate": 7.923771652028246e-06, "loss": 0.4854, "step": 7535 }, { "epoch": 1.9073652239939256, "grad_norm": 0.17820723354816437, "learning_rate": 7.921429562287673e-06, "loss": 0.4656, "step": 7536 }, { "epoch": 1.9076183244748166, "grad_norm": 0.1690894365310669, "learning_rate": 7.919087591707135e-06, "loss": 0.4788, "step": 7537 }, { "epoch": 1.9078714249557074, "grad_norm": 0.6085459589958191, "learning_rate": 7.916745740420891e-06, "loss": 0.5017, "step": 7538 }, { "epoch": 1.9081245254365984, "grad_norm": 0.16968956589698792, "learning_rate": 7.914404008563194e-06, "loss": 0.4932, "step": 7539 }, { "epoch": 1.9083776259174892, "grad_norm": 0.173153355717659, "learning_rate": 7.912062396268292e-06, "loss": 0.4966, "step": 7540 }, { "epoch": 1.9086307263983802, "grad_norm": 0.1709088385105133, "learning_rate": 7.90972090367042e-06, "loss": 0.4864, "step": 7541 }, { "epoch": 1.9088838268792712, "grad_norm": 0.1731918603181839, "learning_rate": 7.907379530903818e-06, "loss": 0.479, "step": 7542 }, { "epoch": 1.909136927360162, "grad_norm": 0.16382324695587158, "learning_rate": 7.90503827810271e-06, "loss": 0.4638, "step": 7543 }, { "epoch": 1.909390027841053, "grad_norm": 0.16988088190555573, "learning_rate": 7.902697145401313e-06, "loss": 0.4822, "step": 7544 }, { "epoch": 1.9096431283219437, "grad_norm": 0.1674882471561432, "learning_rate": 7.900356132933838e-06, "loss": 0.5052, "step": 7545 }, { "epoch": 1.9098962288028347, "grad_norm": 0.16964825987815857, "learning_rate": 7.898015240834492e-06, "loss": 0.4889, "step": 7546 }, { "epoch": 1.9101493292837257, "grad_norm": 0.1723451465368271, "learning_rate": 7.895674469237473e-06, "loss": 0.4817, "step": 7547 }, { "epoch": 1.9104024297646167, "grad_norm": 0.17654018104076385, "learning_rate": 7.893333818276973e-06, "loss": 0.468, "step": 7548 }, { "epoch": 1.9106555302455075, "grad_norm": 0.16950403153896332, "learning_rate": 7.890993288087181e-06, "loss": 0.4714, "step": 7549 }, { "epoch": 1.9109086307263983, "grad_norm": 0.1648697704076767, "learning_rate": 7.888652878802265e-06, "loss": 0.48, "step": 7550 }, { "epoch": 1.9111617312072893, "grad_norm": 0.16696491837501526, "learning_rate": 7.886312590556403e-06, "loss": 0.4994, "step": 7551 }, { "epoch": 1.9114148316881803, "grad_norm": 0.17109908163547516, "learning_rate": 7.883972423483754e-06, "loss": 0.4725, "step": 7552 }, { "epoch": 1.9116679321690713, "grad_norm": 0.16525208950042725, "learning_rate": 7.88163237771848e-06, "loss": 0.4795, "step": 7553 }, { "epoch": 1.911921032649962, "grad_norm": 0.16652193665504456, "learning_rate": 7.879292453394728e-06, "loss": 0.468, "step": 7554 }, { "epoch": 1.9121741331308528, "grad_norm": 0.1686284840106964, "learning_rate": 7.87695265064664e-06, "loss": 0.4545, "step": 7555 }, { "epoch": 1.9124272336117438, "grad_norm": 0.1667724996805191, "learning_rate": 7.874612969608353e-06, "loss": 0.4597, "step": 7556 }, { "epoch": 1.9126803340926348, "grad_norm": 0.183907613158226, "learning_rate": 7.872273410413996e-06, "loss": 0.4868, "step": 7557 }, { "epoch": 1.9129334345735258, "grad_norm": 0.16917796432971954, "learning_rate": 7.869933973197689e-06, "loss": 0.4927, "step": 7558 }, { "epoch": 1.9131865350544166, "grad_norm": 0.17343157529830933, "learning_rate": 7.867594658093552e-06, "loss": 0.4956, "step": 7559 }, { "epoch": 1.9134396355353074, "grad_norm": 0.16949833929538727, "learning_rate": 7.865255465235692e-06, "loss": 0.4863, "step": 7560 }, { "epoch": 1.9136927360161984, "grad_norm": 0.16937707364559174, "learning_rate": 7.862916394758205e-06, "loss": 0.4978, "step": 7561 }, { "epoch": 1.9139458364970894, "grad_norm": 0.17473581433296204, "learning_rate": 7.860577446795188e-06, "loss": 0.4796, "step": 7562 }, { "epoch": 1.9141989369779804, "grad_norm": 0.16813047230243683, "learning_rate": 7.858238621480726e-06, "loss": 0.4983, "step": 7563 }, { "epoch": 1.9144520374588712, "grad_norm": 0.16782943904399872, "learning_rate": 7.855899918948902e-06, "loss": 0.4713, "step": 7564 }, { "epoch": 1.914705137939762, "grad_norm": 0.17564095556735992, "learning_rate": 7.853561339333788e-06, "loss": 0.4927, "step": 7565 }, { "epoch": 1.914958238420653, "grad_norm": 0.17316092550754547, "learning_rate": 7.85122288276945e-06, "loss": 0.481, "step": 7566 }, { "epoch": 1.915211338901544, "grad_norm": 0.17347900569438934, "learning_rate": 7.848884549389945e-06, "loss": 0.4909, "step": 7567 }, { "epoch": 1.915464439382435, "grad_norm": 0.17437297105789185, "learning_rate": 7.846546339329326e-06, "loss": 0.4987, "step": 7568 }, { "epoch": 1.9157175398633257, "grad_norm": 0.17461377382278442, "learning_rate": 7.844208252721637e-06, "loss": 0.4959, "step": 7569 }, { "epoch": 1.9159706403442165, "grad_norm": 0.1718897968530655, "learning_rate": 7.841870289700914e-06, "loss": 0.4792, "step": 7570 }, { "epoch": 1.9162237408251075, "grad_norm": 0.17480725049972534, "learning_rate": 7.839532450401195e-06, "loss": 0.4781, "step": 7571 }, { "epoch": 1.9164768413059985, "grad_norm": 0.16720210015773773, "learning_rate": 7.837194734956492e-06, "loss": 0.461, "step": 7572 }, { "epoch": 1.9167299417868895, "grad_norm": 0.1716834306716919, "learning_rate": 7.83485714350083e-06, "loss": 0.493, "step": 7573 }, { "epoch": 1.9169830422677803, "grad_norm": 0.1719888150691986, "learning_rate": 7.832519676168215e-06, "loss": 0.4755, "step": 7574 }, { "epoch": 1.9172361427486713, "grad_norm": 0.1722981482744217, "learning_rate": 7.830182333092648e-06, "loss": 0.468, "step": 7575 }, { "epoch": 1.917489243229562, "grad_norm": 0.17401981353759766, "learning_rate": 7.827845114408123e-06, "loss": 0.4981, "step": 7576 }, { "epoch": 1.917742343710453, "grad_norm": 0.1700400859117508, "learning_rate": 7.825508020248638e-06, "loss": 0.4882, "step": 7577 }, { "epoch": 1.917995444191344, "grad_norm": 0.16838926076889038, "learning_rate": 7.823171050748157e-06, "loss": 0.4735, "step": 7578 }, { "epoch": 1.9182485446722348, "grad_norm": 0.1714935302734375, "learning_rate": 7.820834206040666e-06, "loss": 0.464, "step": 7579 }, { "epoch": 1.9185016451531258, "grad_norm": 0.18877147138118744, "learning_rate": 7.818497486260126e-06, "loss": 0.4669, "step": 7580 }, { "epoch": 1.9187547456340166, "grad_norm": 0.17123602330684662, "learning_rate": 7.816160891540499e-06, "loss": 0.4477, "step": 7581 }, { "epoch": 1.9190078461149076, "grad_norm": 0.17090952396392822, "learning_rate": 7.813824422015737e-06, "loss": 0.5127, "step": 7582 }, { "epoch": 1.9192609465957986, "grad_norm": 0.16903527081012726, "learning_rate": 7.811488077819782e-06, "loss": 0.4728, "step": 7583 }, { "epoch": 1.9195140470766896, "grad_norm": 0.17241840064525604, "learning_rate": 7.80915185908657e-06, "loss": 0.5182, "step": 7584 }, { "epoch": 1.9197671475575804, "grad_norm": 0.1700318306684494, "learning_rate": 7.806815765950036e-06, "loss": 0.4722, "step": 7585 }, { "epoch": 1.9200202480384712, "grad_norm": 0.21631529927253723, "learning_rate": 7.804479798544103e-06, "loss": 0.5029, "step": 7586 }, { "epoch": 1.9202733485193622, "grad_norm": 0.17060992121696472, "learning_rate": 7.802143957002686e-06, "loss": 0.4727, "step": 7587 }, { "epoch": 1.9205264490002532, "grad_norm": 0.16924887895584106, "learning_rate": 7.799808241459696e-06, "loss": 0.4824, "step": 7588 }, { "epoch": 1.9207795494811442, "grad_norm": 0.17316707968711853, "learning_rate": 7.79747265204903e-06, "loss": 0.5057, "step": 7589 }, { "epoch": 1.921032649962035, "grad_norm": 0.16828739643096924, "learning_rate": 7.795137188904585e-06, "loss": 0.4966, "step": 7590 }, { "epoch": 1.9212857504429257, "grad_norm": 0.1684328019618988, "learning_rate": 7.792801852160248e-06, "loss": 0.4556, "step": 7591 }, { "epoch": 1.9215388509238167, "grad_norm": 0.17403481900691986, "learning_rate": 7.790466641949897e-06, "loss": 0.4693, "step": 7592 }, { "epoch": 1.9217919514047077, "grad_norm": 0.1664360612630844, "learning_rate": 7.788131558407411e-06, "loss": 0.4642, "step": 7593 }, { "epoch": 1.9220450518855987, "grad_norm": 0.1743192821741104, "learning_rate": 7.785796601666647e-06, "loss": 0.4999, "step": 7594 }, { "epoch": 1.9222981523664895, "grad_norm": 0.17105965316295624, "learning_rate": 7.783461771861468e-06, "loss": 0.502, "step": 7595 }, { "epoch": 1.9225512528473803, "grad_norm": 0.17292706668376923, "learning_rate": 7.781127069125723e-06, "loss": 0.4981, "step": 7596 }, { "epoch": 1.9228043533282713, "grad_norm": 0.17340172827243805, "learning_rate": 7.778792493593257e-06, "loss": 0.4851, "step": 7597 }, { "epoch": 1.9230574538091623, "grad_norm": 0.17009900510311127, "learning_rate": 7.776458045397904e-06, "loss": 0.4637, "step": 7598 }, { "epoch": 1.9233105542900533, "grad_norm": 0.16297782957553864, "learning_rate": 7.774123724673499e-06, "loss": 0.4636, "step": 7599 }, { "epoch": 1.923563654770944, "grad_norm": 0.17058652639389038, "learning_rate": 7.771789531553858e-06, "loss": 0.4682, "step": 7600 }, { "epoch": 1.9238167552518348, "grad_norm": 0.16979745030403137, "learning_rate": 7.769455466172791e-06, "loss": 0.477, "step": 7601 }, { "epoch": 1.9240698557327258, "grad_norm": 0.17038284242153168, "learning_rate": 7.767121528664114e-06, "loss": 0.4935, "step": 7602 }, { "epoch": 1.9243229562136168, "grad_norm": 0.17250414192676544, "learning_rate": 7.764787719161625e-06, "loss": 0.5001, "step": 7603 }, { "epoch": 1.9245760566945078, "grad_norm": 0.1913619041442871, "learning_rate": 7.762454037799116e-06, "loss": 0.4835, "step": 7604 }, { "epoch": 1.9248291571753986, "grad_norm": 0.16355861723423004, "learning_rate": 7.760120484710367e-06, "loss": 0.5021, "step": 7605 }, { "epoch": 1.9250822576562896, "grad_norm": 0.16368979215621948, "learning_rate": 7.75778706002916e-06, "loss": 0.48, "step": 7606 }, { "epoch": 1.9253353581371804, "grad_norm": 0.17397764325141907, "learning_rate": 7.755453763889264e-06, "loss": 0.478, "step": 7607 }, { "epoch": 1.9255884586180714, "grad_norm": 0.18587905168533325, "learning_rate": 7.753120596424443e-06, "loss": 0.4742, "step": 7608 }, { "epoch": 1.9258415590989624, "grad_norm": 0.17380116879940033, "learning_rate": 7.750787557768453e-06, "loss": 0.4892, "step": 7609 }, { "epoch": 1.9260946595798532, "grad_norm": 0.1733115166425705, "learning_rate": 7.748454648055043e-06, "loss": 0.4986, "step": 7610 }, { "epoch": 1.9263477600607442, "grad_norm": 0.16379114985466003, "learning_rate": 7.74612186741795e-06, "loss": 0.4871, "step": 7611 }, { "epoch": 1.926600860541635, "grad_norm": 0.1665450930595398, "learning_rate": 7.743789215990908e-06, "loss": 0.4675, "step": 7612 }, { "epoch": 1.926853961022526, "grad_norm": 0.17246536910533905, "learning_rate": 7.741456693907646e-06, "loss": 0.4779, "step": 7613 }, { "epoch": 1.927107061503417, "grad_norm": 0.16827593743801117, "learning_rate": 7.739124301301877e-06, "loss": 0.4636, "step": 7614 }, { "epoch": 1.927360161984308, "grad_norm": 0.1644127517938614, "learning_rate": 7.736792038307324e-06, "loss": 0.4818, "step": 7615 }, { "epoch": 1.9276132624651987, "grad_norm": 0.16788238286972046, "learning_rate": 7.734459905057675e-06, "loss": 0.4817, "step": 7616 }, { "epoch": 1.9278663629460895, "grad_norm": 0.17108166217803955, "learning_rate": 7.732127901686637e-06, "loss": 0.4681, "step": 7617 }, { "epoch": 1.9281194634269805, "grad_norm": 0.16698578000068665, "learning_rate": 7.729796028327895e-06, "loss": 0.4865, "step": 7618 }, { "epoch": 1.9283725639078715, "grad_norm": 0.16242370009422302, "learning_rate": 7.727464285115132e-06, "loss": 0.4641, "step": 7619 }, { "epoch": 1.9286256643887625, "grad_norm": 0.16846536099910736, "learning_rate": 7.725132672182017e-06, "loss": 0.4552, "step": 7620 }, { "epoch": 1.9288787648696533, "grad_norm": 0.17502997815608978, "learning_rate": 7.722801189662228e-06, "loss": 0.4686, "step": 7621 }, { "epoch": 1.929131865350544, "grad_norm": 0.47229158878326416, "learning_rate": 7.720469837689412e-06, "loss": 0.4782, "step": 7622 }, { "epoch": 1.929384965831435, "grad_norm": 0.16991984844207764, "learning_rate": 7.718138616397221e-06, "loss": 0.477, "step": 7623 }, { "epoch": 1.929638066312326, "grad_norm": 0.1710294485092163, "learning_rate": 7.715807525919309e-06, "loss": 0.4544, "step": 7624 }, { "epoch": 1.929891166793217, "grad_norm": 0.17150866985321045, "learning_rate": 7.713476566389301e-06, "loss": 0.4928, "step": 7625 }, { "epoch": 1.9301442672741078, "grad_norm": 0.16729570925235748, "learning_rate": 7.711145737940834e-06, "loss": 0.4763, "step": 7626 }, { "epoch": 1.9303973677549986, "grad_norm": 0.17020000517368317, "learning_rate": 7.708815040707528e-06, "loss": 0.503, "step": 7627 }, { "epoch": 1.9306504682358896, "grad_norm": 0.1769431233406067, "learning_rate": 7.706484474822995e-06, "loss": 0.4829, "step": 7628 }, { "epoch": 1.9309035687167806, "grad_norm": 0.16917797923088074, "learning_rate": 7.70415404042084e-06, "loss": 0.4893, "step": 7629 }, { "epoch": 1.9311566691976716, "grad_norm": 0.17175111174583435, "learning_rate": 7.701823737634664e-06, "loss": 0.5017, "step": 7630 }, { "epoch": 1.9314097696785624, "grad_norm": 0.16869279742240906, "learning_rate": 7.699493566598057e-06, "loss": 0.4733, "step": 7631 }, { "epoch": 1.9316628701594531, "grad_norm": 0.17365644872188568, "learning_rate": 7.69716352744461e-06, "loss": 0.4726, "step": 7632 }, { "epoch": 1.9319159706403441, "grad_norm": 0.16650114953517914, "learning_rate": 7.694833620307887e-06, "loss": 0.4659, "step": 7633 }, { "epoch": 1.9321690711212351, "grad_norm": 0.16720078885555267, "learning_rate": 7.692503845321466e-06, "loss": 0.4693, "step": 7634 }, { "epoch": 1.9324221716021261, "grad_norm": 0.16674861311912537, "learning_rate": 7.690174202618902e-06, "loss": 0.4578, "step": 7635 }, { "epoch": 1.932675272083017, "grad_norm": 0.16818995773792267, "learning_rate": 7.687844692333752e-06, "loss": 0.4829, "step": 7636 }, { "epoch": 1.932928372563908, "grad_norm": 0.17244726419448853, "learning_rate": 7.68551531459956e-06, "loss": 0.4838, "step": 7637 }, { "epoch": 1.9331814730447987, "grad_norm": 0.17176634073257446, "learning_rate": 7.68318606954987e-06, "loss": 0.4666, "step": 7638 }, { "epoch": 1.9334345735256897, "grad_norm": 0.17422066628932953, "learning_rate": 7.680856957318205e-06, "loss": 0.4924, "step": 7639 }, { "epoch": 1.9336876740065807, "grad_norm": 0.17838844656944275, "learning_rate": 7.678527978038092e-06, "loss": 0.512, "step": 7640 }, { "epoch": 1.9339407744874715, "grad_norm": 0.1730373352766037, "learning_rate": 7.676199131843045e-06, "loss": 0.5004, "step": 7641 }, { "epoch": 1.9341938749683625, "grad_norm": 0.17755882441997528, "learning_rate": 7.67387041886657e-06, "loss": 0.4885, "step": 7642 }, { "epoch": 1.9344469754492533, "grad_norm": 0.17091375589370728, "learning_rate": 7.671541839242175e-06, "loss": 0.5065, "step": 7643 }, { "epoch": 1.9347000759301443, "grad_norm": 0.16261939704418182, "learning_rate": 7.669213393103346e-06, "loss": 0.4475, "step": 7644 }, { "epoch": 1.9349531764110353, "grad_norm": 0.17048093676567078, "learning_rate": 7.666885080583565e-06, "loss": 0.4782, "step": 7645 }, { "epoch": 1.9352062768919263, "grad_norm": 0.1619502454996109, "learning_rate": 7.664556901816315e-06, "loss": 0.4477, "step": 7646 }, { "epoch": 1.935459377372817, "grad_norm": 0.1657402515411377, "learning_rate": 7.662228856935065e-06, "loss": 0.4948, "step": 7647 }, { "epoch": 1.9357124778537078, "grad_norm": 0.17473958432674408, "learning_rate": 7.659900946073275e-06, "loss": 0.4881, "step": 7648 }, { "epoch": 1.9359655783345988, "grad_norm": 0.17052216827869415, "learning_rate": 7.657573169364401e-06, "loss": 0.522, "step": 7649 }, { "epoch": 1.9362186788154898, "grad_norm": 0.18563111126422882, "learning_rate": 7.655245526941887e-06, "loss": 0.4802, "step": 7650 }, { "epoch": 1.9364717792963808, "grad_norm": 0.16869883239269257, "learning_rate": 7.65291801893917e-06, "loss": 0.5197, "step": 7651 }, { "epoch": 1.9367248797772716, "grad_norm": 0.17313377559185028, "learning_rate": 7.650590645489689e-06, "loss": 0.4846, "step": 7652 }, { "epoch": 1.9369779802581624, "grad_norm": 0.16845481097698212, "learning_rate": 7.64826340672686e-06, "loss": 0.5014, "step": 7653 }, { "epoch": 1.9372310807390534, "grad_norm": 0.1740826517343521, "learning_rate": 7.645936302784105e-06, "loss": 0.4978, "step": 7654 }, { "epoch": 1.9374841812199444, "grad_norm": 0.17022646963596344, "learning_rate": 7.643609333794826e-06, "loss": 0.4821, "step": 7655 }, { "epoch": 1.9377372817008354, "grad_norm": 0.16922326385974884, "learning_rate": 7.641282499892425e-06, "loss": 0.4561, "step": 7656 }, { "epoch": 1.9379903821817261, "grad_norm": 0.17195171117782593, "learning_rate": 7.638955801210294e-06, "loss": 0.4837, "step": 7657 }, { "epoch": 1.938243482662617, "grad_norm": 0.17176537215709686, "learning_rate": 7.636629237881817e-06, "loss": 0.472, "step": 7658 }, { "epoch": 1.938496583143508, "grad_norm": 0.16527897119522095, "learning_rate": 7.634302810040375e-06, "loss": 0.4934, "step": 7659 }, { "epoch": 1.938749683624399, "grad_norm": 0.17107507586479187, "learning_rate": 7.631976517819337e-06, "loss": 0.4967, "step": 7660 }, { "epoch": 1.93900278410529, "grad_norm": 0.17294839024543762, "learning_rate": 7.629650361352059e-06, "loss": 0.4739, "step": 7661 }, { "epoch": 1.9392558845861807, "grad_norm": 0.17344288527965546, "learning_rate": 7.627324340771899e-06, "loss": 0.4821, "step": 7662 }, { "epoch": 1.9395089850670715, "grad_norm": 0.17177030444145203, "learning_rate": 7.6249984562122006e-06, "loss": 0.4863, "step": 7663 }, { "epoch": 1.9397620855479625, "grad_norm": 0.1702365279197693, "learning_rate": 7.622672707806301e-06, "loss": 0.5018, "step": 7664 }, { "epoch": 1.9400151860288535, "grad_norm": 0.16822798550128937, "learning_rate": 7.620347095687539e-06, "loss": 0.4776, "step": 7665 }, { "epoch": 1.9402682865097445, "grad_norm": 0.17144788801670074, "learning_rate": 7.6180216199892256e-06, "loss": 0.498, "step": 7666 }, { "epoch": 1.9405213869906353, "grad_norm": 0.18506507575511932, "learning_rate": 7.615696280844678e-06, "loss": 0.4722, "step": 7667 }, { "epoch": 1.940774487471526, "grad_norm": 0.16786013543605804, "learning_rate": 7.613371078387206e-06, "loss": 0.4647, "step": 7668 }, { "epoch": 1.941027587952417, "grad_norm": 0.17265526950359344, "learning_rate": 7.611046012750107e-06, "loss": 0.4839, "step": 7669 }, { "epoch": 1.941280688433308, "grad_norm": 0.17328564822673798, "learning_rate": 7.6087210840666745e-06, "loss": 0.4792, "step": 7670 }, { "epoch": 1.941533788914199, "grad_norm": 0.17175732553005219, "learning_rate": 7.606396292470191e-06, "loss": 0.4637, "step": 7671 }, { "epoch": 1.9417868893950898, "grad_norm": 0.16723895072937012, "learning_rate": 7.604071638093929e-06, "loss": 0.4813, "step": 7672 }, { "epoch": 1.9420399898759808, "grad_norm": 0.17080406844615936, "learning_rate": 7.601747121071155e-06, "loss": 0.4996, "step": 7673 }, { "epoch": 1.9422930903568716, "grad_norm": 0.17222335934638977, "learning_rate": 7.599422741535133e-06, "loss": 0.4752, "step": 7674 }, { "epoch": 1.9425461908377626, "grad_norm": 0.17241591215133667, "learning_rate": 7.5970984996191136e-06, "loss": 0.4842, "step": 7675 }, { "epoch": 1.9427992913186536, "grad_norm": 0.17263348400592804, "learning_rate": 7.5947743954563425e-06, "loss": 0.4937, "step": 7676 }, { "epoch": 1.9430523917995444, "grad_norm": 0.16948461532592773, "learning_rate": 7.592450429180051e-06, "loss": 0.4839, "step": 7677 }, { "epoch": 1.9433054922804354, "grad_norm": 0.16345185041427612, "learning_rate": 7.590126600923469e-06, "loss": 0.4759, "step": 7678 }, { "epoch": 1.9435585927613261, "grad_norm": 0.16810891032218933, "learning_rate": 7.587802910819817e-06, "loss": 0.485, "step": 7679 }, { "epoch": 1.9438116932422171, "grad_norm": 0.1669209897518158, "learning_rate": 7.5854793590023054e-06, "loss": 0.4778, "step": 7680 }, { "epoch": 1.9440647937231081, "grad_norm": 0.1732999086380005, "learning_rate": 7.5831559456041414e-06, "loss": 0.4762, "step": 7681 }, { "epoch": 1.9443178942039991, "grad_norm": 0.1713452786207199, "learning_rate": 7.580832670758524e-06, "loss": 0.4905, "step": 7682 }, { "epoch": 1.94457099468489, "grad_norm": 0.1646720916032791, "learning_rate": 7.578509534598636e-06, "loss": 0.452, "step": 7683 }, { "epoch": 1.9448240951657807, "grad_norm": 0.17206113040447235, "learning_rate": 7.576186537257659e-06, "loss": 0.5153, "step": 7684 }, { "epoch": 1.9450771956466717, "grad_norm": 0.1695532351732254, "learning_rate": 7.5738636788687655e-06, "loss": 0.4688, "step": 7685 }, { "epoch": 1.9453302961275627, "grad_norm": 0.16682331264019012, "learning_rate": 7.57154095956512e-06, "loss": 0.4748, "step": 7686 }, { "epoch": 1.9455833966084537, "grad_norm": 0.17253154516220093, "learning_rate": 7.569218379479882e-06, "loss": 0.4656, "step": 7687 }, { "epoch": 1.9458364970893445, "grad_norm": 0.17449717223644257, "learning_rate": 7.566895938746202e-06, "loss": 0.4681, "step": 7688 }, { "epoch": 1.9460895975702353, "grad_norm": 0.1664774864912033, "learning_rate": 7.56457363749721e-06, "loss": 0.4568, "step": 7689 }, { "epoch": 1.9463426980511263, "grad_norm": 0.17276425659656525, "learning_rate": 7.562251475866047e-06, "loss": 0.4833, "step": 7690 }, { "epoch": 1.9465957985320173, "grad_norm": 0.17048262059688568, "learning_rate": 7.559929453985837e-06, "loss": 0.51, "step": 7691 }, { "epoch": 1.9468488990129083, "grad_norm": 0.1708894670009613, "learning_rate": 7.557607571989694e-06, "loss": 0.485, "step": 7692 }, { "epoch": 1.947101999493799, "grad_norm": 0.17778252065181732, "learning_rate": 7.555285830010733e-06, "loss": 0.4989, "step": 7693 }, { "epoch": 1.9473550999746898, "grad_norm": 0.17726394534111023, "learning_rate": 7.552964228182045e-06, "loss": 0.4907, "step": 7694 }, { "epoch": 1.9476082004555808, "grad_norm": 0.17050319910049438, "learning_rate": 7.5506427666367245e-06, "loss": 0.4836, "step": 7695 }, { "epoch": 1.9478613009364718, "grad_norm": 0.16960790753364563, "learning_rate": 7.5483214455078615e-06, "loss": 0.4898, "step": 7696 }, { "epoch": 1.9481144014173628, "grad_norm": 0.168160542845726, "learning_rate": 7.546000264928529e-06, "loss": 0.4682, "step": 7697 }, { "epoch": 1.9483675018982536, "grad_norm": 0.17243355512619019, "learning_rate": 7.5436792250317966e-06, "loss": 0.504, "step": 7698 }, { "epoch": 1.9486206023791444, "grad_norm": 0.16911885142326355, "learning_rate": 7.541358325950725e-06, "loss": 0.4735, "step": 7699 }, { "epoch": 1.9488737028600354, "grad_norm": 0.17921356856822968, "learning_rate": 7.539037567818364e-06, "loss": 0.4894, "step": 7700 }, { "epoch": 1.9491268033409264, "grad_norm": 0.17159800231456757, "learning_rate": 7.5367169507677575e-06, "loss": 0.4605, "step": 7701 }, { "epoch": 1.9493799038218174, "grad_norm": 0.16815003752708435, "learning_rate": 7.534396474931942e-06, "loss": 0.4927, "step": 7702 }, { "epoch": 1.9496330043027081, "grad_norm": 0.1655822992324829, "learning_rate": 7.532076140443949e-06, "loss": 0.4881, "step": 7703 }, { "epoch": 1.9498861047835991, "grad_norm": 0.1674162894487381, "learning_rate": 7.529755947436796e-06, "loss": 0.5025, "step": 7704 }, { "epoch": 1.95013920526449, "grad_norm": 0.1719149947166443, "learning_rate": 7.527435896043494e-06, "loss": 0.4913, "step": 7705 }, { "epoch": 1.950392305745381, "grad_norm": 0.170358344912529, "learning_rate": 7.525115986397047e-06, "loss": 0.4704, "step": 7706 }, { "epoch": 1.950645406226272, "grad_norm": 0.1700442135334015, "learning_rate": 7.522796218630451e-06, "loss": 0.4678, "step": 7707 }, { "epoch": 1.9508985067071627, "grad_norm": 0.17494124174118042, "learning_rate": 7.520476592876691e-06, "loss": 0.4917, "step": 7708 }, { "epoch": 1.9511516071880537, "grad_norm": 0.16626161336898804, "learning_rate": 7.518157109268749e-06, "loss": 0.4744, "step": 7709 }, { "epoch": 1.9514047076689445, "grad_norm": 0.16524098813533783, "learning_rate": 7.515837767939601e-06, "loss": 0.4699, "step": 7710 }, { "epoch": 1.9516578081498355, "grad_norm": 0.16865390539169312, "learning_rate": 7.5135185690221966e-06, "loss": 0.4797, "step": 7711 }, { "epoch": 1.9519109086307265, "grad_norm": 0.16876739263534546, "learning_rate": 7.511199512649502e-06, "loss": 0.4811, "step": 7712 }, { "epoch": 1.9521640091116175, "grad_norm": 0.16907908022403717, "learning_rate": 7.508880598954459e-06, "loss": 0.4945, "step": 7713 }, { "epoch": 1.9524171095925082, "grad_norm": 0.16976334154605865, "learning_rate": 7.506561828070007e-06, "loss": 0.4733, "step": 7714 }, { "epoch": 1.952670210073399, "grad_norm": 0.17002898454666138, "learning_rate": 7.504243200129078e-06, "loss": 0.4874, "step": 7715 }, { "epoch": 1.95292331055429, "grad_norm": 0.16643787920475006, "learning_rate": 7.50192471526459e-06, "loss": 0.4814, "step": 7716 }, { "epoch": 1.953176411035181, "grad_norm": 0.1763046681880951, "learning_rate": 7.499606373609458e-06, "loss": 0.49, "step": 7717 }, { "epoch": 1.953429511516072, "grad_norm": 0.17257030308246613, "learning_rate": 7.49728817529659e-06, "loss": 0.4889, "step": 7718 }, { "epoch": 1.9536826119969628, "grad_norm": 0.1719270646572113, "learning_rate": 7.494970120458882e-06, "loss": 0.4961, "step": 7719 }, { "epoch": 1.9539357124778536, "grad_norm": 0.1698017567396164, "learning_rate": 7.492652209229223e-06, "loss": 0.4779, "step": 7720 }, { "epoch": 1.9541888129587446, "grad_norm": 0.16674113273620605, "learning_rate": 7.4903344417404965e-06, "loss": 0.4601, "step": 7721 }, { "epoch": 1.9544419134396356, "grad_norm": 0.167949378490448, "learning_rate": 7.488016818125572e-06, "loss": 0.5042, "step": 7722 }, { "epoch": 1.9546950139205266, "grad_norm": 0.17170612514019012, "learning_rate": 7.485699338517314e-06, "loss": 0.5085, "step": 7723 }, { "epoch": 1.9549481144014174, "grad_norm": 0.17367015779018402, "learning_rate": 7.483382003048576e-06, "loss": 0.5082, "step": 7724 }, { "epoch": 1.9552012148823081, "grad_norm": 0.16987574100494385, "learning_rate": 7.481064811852212e-06, "loss": 0.4596, "step": 7725 }, { "epoch": 1.9554543153631991, "grad_norm": 0.16515661776065826, "learning_rate": 7.478747765061063e-06, "loss": 0.4902, "step": 7726 }, { "epoch": 1.9557074158440901, "grad_norm": 0.1761130839586258, "learning_rate": 7.476430862807953e-06, "loss": 0.489, "step": 7727 }, { "epoch": 1.9559605163249811, "grad_norm": 0.17370308935642242, "learning_rate": 7.474114105225708e-06, "loss": 0.4855, "step": 7728 }, { "epoch": 1.956213616805872, "grad_norm": 0.1790476143360138, "learning_rate": 7.471797492447144e-06, "loss": 0.4626, "step": 7729 }, { "epoch": 1.9564667172867627, "grad_norm": 0.1678432673215866, "learning_rate": 7.469481024605068e-06, "loss": 0.5002, "step": 7730 }, { "epoch": 1.9567198177676537, "grad_norm": 0.1687813550233841, "learning_rate": 7.4671647018322726e-06, "loss": 0.4928, "step": 7731 }, { "epoch": 1.9569729182485447, "grad_norm": 0.19283978641033173, "learning_rate": 7.46484852426156e-06, "loss": 0.4764, "step": 7732 }, { "epoch": 1.9572260187294357, "grad_norm": 0.16796201467514038, "learning_rate": 7.462532492025696e-06, "loss": 0.4688, "step": 7733 }, { "epoch": 1.9574791192103265, "grad_norm": 0.176426500082016, "learning_rate": 7.460216605257465e-06, "loss": 0.4701, "step": 7734 }, { "epoch": 1.9577322196912175, "grad_norm": 0.17226792871952057, "learning_rate": 7.457900864089629e-06, "loss": 0.4814, "step": 7735 }, { "epoch": 1.9579853201721082, "grad_norm": 0.16734755039215088, "learning_rate": 7.455585268654943e-06, "loss": 0.4685, "step": 7736 }, { "epoch": 1.9582384206529992, "grad_norm": 0.17431394755840302, "learning_rate": 7.453269819086159e-06, "loss": 0.4679, "step": 7737 }, { "epoch": 1.9584915211338902, "grad_norm": 0.1699412316083908, "learning_rate": 7.450954515516011e-06, "loss": 0.4775, "step": 7738 }, { "epoch": 1.958744621614781, "grad_norm": 0.1690680831670761, "learning_rate": 7.4486393580772324e-06, "loss": 0.4854, "step": 7739 }, { "epoch": 1.958997722095672, "grad_norm": 0.16900946199893951, "learning_rate": 7.446324346902549e-06, "loss": 0.4667, "step": 7740 }, { "epoch": 1.9592508225765628, "grad_norm": 0.1700851321220398, "learning_rate": 7.444009482124674e-06, "loss": 0.4892, "step": 7741 }, { "epoch": 1.9595039230574538, "grad_norm": 0.17137254774570465, "learning_rate": 7.441694763876312e-06, "loss": 0.5125, "step": 7742 }, { "epoch": 1.9597570235383448, "grad_norm": 0.17604368925094604, "learning_rate": 7.439380192290167e-06, "loss": 0.4731, "step": 7743 }, { "epoch": 1.9600101240192358, "grad_norm": 0.1753542423248291, "learning_rate": 7.437065767498921e-06, "loss": 0.496, "step": 7744 }, { "epoch": 1.9602632245001266, "grad_norm": 0.17464949190616608, "learning_rate": 7.434751489635257e-06, "loss": 0.4631, "step": 7745 }, { "epoch": 1.9605163249810174, "grad_norm": 0.1641547530889511, "learning_rate": 7.432437358831848e-06, "loss": 0.4498, "step": 7746 }, { "epoch": 1.9607694254619084, "grad_norm": 0.16465897858142853, "learning_rate": 7.4301233752213585e-06, "loss": 0.451, "step": 7747 }, { "epoch": 1.9610225259427994, "grad_norm": 0.1697414219379425, "learning_rate": 7.427809538936448e-06, "loss": 0.5066, "step": 7748 }, { "epoch": 1.9612756264236904, "grad_norm": 0.1715935915708542, "learning_rate": 7.425495850109759e-06, "loss": 0.4814, "step": 7749 }, { "epoch": 1.9615287269045811, "grad_norm": 0.17282819747924805, "learning_rate": 7.423182308873931e-06, "loss": 0.4598, "step": 7750 }, { "epoch": 1.961781827385472, "grad_norm": 0.1759367138147354, "learning_rate": 7.420868915361595e-06, "loss": 0.4755, "step": 7751 }, { "epoch": 1.962034927866363, "grad_norm": 0.17530009150505066, "learning_rate": 7.418555669705374e-06, "loss": 0.4685, "step": 7752 }, { "epoch": 1.962288028347254, "grad_norm": 0.1727498322725296, "learning_rate": 7.416242572037877e-06, "loss": 0.494, "step": 7753 }, { "epoch": 1.962541128828145, "grad_norm": 0.1698807179927826, "learning_rate": 7.41392962249172e-06, "loss": 0.472, "step": 7754 }, { "epoch": 1.9627942293090357, "grad_norm": 0.1699642539024353, "learning_rate": 7.411616821199486e-06, "loss": 0.4844, "step": 7755 }, { "epoch": 1.9630473297899265, "grad_norm": 0.18269236385822296, "learning_rate": 7.409304168293772e-06, "loss": 0.4946, "step": 7756 }, { "epoch": 1.9633004302708175, "grad_norm": 0.17967644333839417, "learning_rate": 7.406991663907152e-06, "loss": 0.4499, "step": 7757 }, { "epoch": 1.9635535307517085, "grad_norm": 0.17006640136241913, "learning_rate": 7.4046793081722004e-06, "loss": 0.479, "step": 7758 }, { "epoch": 1.9638066312325995, "grad_norm": 0.2126229703426361, "learning_rate": 7.402367101221477e-06, "loss": 0.5068, "step": 7759 }, { "epoch": 1.9640597317134902, "grad_norm": 0.1665487438440323, "learning_rate": 7.400055043187544e-06, "loss": 0.4653, "step": 7760 }, { "epoch": 1.964312832194381, "grad_norm": 0.1756371259689331, "learning_rate": 7.397743134202936e-06, "loss": 0.5165, "step": 7761 }, { "epoch": 1.964565932675272, "grad_norm": 0.16646040976047516, "learning_rate": 7.395431374400193e-06, "loss": 0.4889, "step": 7762 }, { "epoch": 1.964819033156163, "grad_norm": 0.17115746438503265, "learning_rate": 7.393119763911845e-06, "loss": 0.4873, "step": 7763 }, { "epoch": 1.965072133637054, "grad_norm": 0.1689155101776123, "learning_rate": 7.390808302870411e-06, "loss": 0.4909, "step": 7764 }, { "epoch": 1.9653252341179448, "grad_norm": 0.1687479019165039, "learning_rate": 7.388496991408405e-06, "loss": 0.4714, "step": 7765 }, { "epoch": 1.9655783345988356, "grad_norm": 0.17523878812789917, "learning_rate": 7.386185829658324e-06, "loss": 0.4906, "step": 7766 }, { "epoch": 1.9658314350797266, "grad_norm": 0.16528458893299103, "learning_rate": 7.383874817752665e-06, "loss": 0.4814, "step": 7767 }, { "epoch": 1.9660845355606176, "grad_norm": 0.17824117839336395, "learning_rate": 7.381563955823912e-06, "loss": 0.505, "step": 7768 }, { "epoch": 1.9663376360415086, "grad_norm": 0.17091062664985657, "learning_rate": 7.379253244004545e-06, "loss": 0.4905, "step": 7769 }, { "epoch": 1.9665907365223994, "grad_norm": 0.17094939947128296, "learning_rate": 7.376942682427031e-06, "loss": 0.4964, "step": 7770 }, { "epoch": 1.9668438370032904, "grad_norm": 0.17458947002887726, "learning_rate": 7.37463227122383e-06, "loss": 0.4857, "step": 7771 }, { "epoch": 1.9670969374841811, "grad_norm": 0.17055171728134155, "learning_rate": 7.372322010527389e-06, "loss": 0.5003, "step": 7772 }, { "epoch": 1.9673500379650721, "grad_norm": 0.16873601078987122, "learning_rate": 7.370011900470153e-06, "loss": 0.4654, "step": 7773 }, { "epoch": 1.9676031384459631, "grad_norm": 0.16694213449954987, "learning_rate": 7.367701941184557e-06, "loss": 0.4697, "step": 7774 }, { "epoch": 1.967856238926854, "grad_norm": 0.17316193878650665, "learning_rate": 7.365392132803023e-06, "loss": 0.5096, "step": 7775 }, { "epoch": 1.968109339407745, "grad_norm": 0.16837778687477112, "learning_rate": 7.363082475457975e-06, "loss": 0.4731, "step": 7776 }, { "epoch": 1.9683624398886357, "grad_norm": 0.16997238993644714, "learning_rate": 7.36077296928181e-06, "loss": 0.4576, "step": 7777 }, { "epoch": 1.9686155403695267, "grad_norm": 0.17248405516147614, "learning_rate": 7.3584636144069336e-06, "loss": 0.4892, "step": 7778 }, { "epoch": 1.9688686408504177, "grad_norm": 0.17071548104286194, "learning_rate": 7.356154410965735e-06, "loss": 0.4876, "step": 7779 }, { "epoch": 1.9691217413313087, "grad_norm": 0.1947275847196579, "learning_rate": 7.353845359090594e-06, "loss": 0.4602, "step": 7780 }, { "epoch": 1.9693748418121995, "grad_norm": 0.169552743434906, "learning_rate": 7.351536458913886e-06, "loss": 0.4668, "step": 7781 }, { "epoch": 1.9696279422930902, "grad_norm": 0.16909201443195343, "learning_rate": 7.34922771056798e-06, "loss": 0.4876, "step": 7782 }, { "epoch": 1.9698810427739812, "grad_norm": 0.16459205746650696, "learning_rate": 7.346919114185223e-06, "loss": 0.47, "step": 7783 }, { "epoch": 1.9701341432548722, "grad_norm": 0.16818086802959442, "learning_rate": 7.344610669897961e-06, "loss": 0.4874, "step": 7784 }, { "epoch": 1.9703872437357632, "grad_norm": 0.1776019036769867, "learning_rate": 7.34230237783854e-06, "loss": 0.4973, "step": 7785 }, { "epoch": 1.970640344216654, "grad_norm": 0.1724284440279007, "learning_rate": 7.3399942381392875e-06, "loss": 0.4796, "step": 7786 }, { "epoch": 1.9708934446975448, "grad_norm": 0.18032467365264893, "learning_rate": 7.337686250932523e-06, "loss": 0.5166, "step": 7787 }, { "epoch": 1.9711465451784358, "grad_norm": 0.17817050218582153, "learning_rate": 7.335378416350557e-06, "loss": 0.4845, "step": 7788 }, { "epoch": 1.9713996456593268, "grad_norm": 0.1682683676481247, "learning_rate": 7.333070734525693e-06, "loss": 0.4647, "step": 7789 }, { "epoch": 1.9716527461402178, "grad_norm": 0.2656105160713196, "learning_rate": 7.330763205590226e-06, "loss": 0.4813, "step": 7790 }, { "epoch": 1.9719058466211086, "grad_norm": 0.16904041171073914, "learning_rate": 7.328455829676444e-06, "loss": 0.4942, "step": 7791 }, { "epoch": 1.9721589471019993, "grad_norm": 0.16295331716537476, "learning_rate": 7.326148606916622e-06, "loss": 0.4694, "step": 7792 }, { "epoch": 1.9724120475828903, "grad_norm": 0.17512497305870056, "learning_rate": 7.323841537443031e-06, "loss": 0.4619, "step": 7793 }, { "epoch": 1.9726651480637813, "grad_norm": 0.19458550214767456, "learning_rate": 7.321534621387924e-06, "loss": 0.4987, "step": 7794 }, { "epoch": 1.9729182485446723, "grad_norm": 0.171921506524086, "learning_rate": 7.319227858883557e-06, "loss": 0.5022, "step": 7795 }, { "epoch": 1.9731713490255631, "grad_norm": 0.17293868958950043, "learning_rate": 7.316921250062171e-06, "loss": 0.4553, "step": 7796 }, { "epoch": 1.973424449506454, "grad_norm": 0.1851501315832138, "learning_rate": 7.3146147950559955e-06, "loss": 0.4823, "step": 7797 }, { "epoch": 1.973677549987345, "grad_norm": 0.18048734962940216, "learning_rate": 7.312308493997263e-06, "loss": 0.5007, "step": 7798 }, { "epoch": 1.973930650468236, "grad_norm": 0.17449572682380676, "learning_rate": 7.310002347018178e-06, "loss": 0.4723, "step": 7799 }, { "epoch": 1.974183750949127, "grad_norm": 0.16618280112743378, "learning_rate": 7.307696354250955e-06, "loss": 0.4698, "step": 7800 }, { "epoch": 1.9744368514300177, "grad_norm": 0.1716269850730896, "learning_rate": 7.3053905158277875e-06, "loss": 0.4939, "step": 7801 }, { "epoch": 1.9746899519109087, "grad_norm": 0.2356242835521698, "learning_rate": 7.303084831880867e-06, "loss": 0.4756, "step": 7802 }, { "epoch": 1.9749430523917995, "grad_norm": 0.17089657485485077, "learning_rate": 7.300779302542369e-06, "loss": 0.4779, "step": 7803 }, { "epoch": 1.9751961528726905, "grad_norm": 0.1697448492050171, "learning_rate": 7.2984739279444736e-06, "loss": 0.4591, "step": 7804 }, { "epoch": 1.9754492533535815, "grad_norm": 0.1663224697113037, "learning_rate": 7.296168708219334e-06, "loss": 0.4917, "step": 7805 }, { "epoch": 1.9757023538344722, "grad_norm": 0.1733512431383133, "learning_rate": 7.293863643499106e-06, "loss": 0.4797, "step": 7806 }, { "epoch": 1.9759554543153632, "grad_norm": 0.18238262832164764, "learning_rate": 7.291558733915936e-06, "loss": 0.4953, "step": 7807 }, { "epoch": 1.976208554796254, "grad_norm": 0.17210574448108673, "learning_rate": 7.289253979601958e-06, "loss": 0.461, "step": 7808 }, { "epoch": 1.976461655277145, "grad_norm": 0.1772162914276123, "learning_rate": 7.286949380689301e-06, "loss": 0.4901, "step": 7809 }, { "epoch": 1.976714755758036, "grad_norm": 0.17083695530891418, "learning_rate": 7.2846449373100805e-06, "loss": 0.5097, "step": 7810 }, { "epoch": 1.976967856238927, "grad_norm": 0.17093893885612488, "learning_rate": 7.2823406495964045e-06, "loss": 0.4897, "step": 7811 }, { "epoch": 1.9772209567198178, "grad_norm": 0.16902898252010345, "learning_rate": 7.280036517680373e-06, "loss": 0.4862, "step": 7812 }, { "epoch": 1.9774740572007086, "grad_norm": 0.16860605776309967, "learning_rate": 7.27773254169408e-06, "loss": 0.5039, "step": 7813 }, { "epoch": 1.9777271576815996, "grad_norm": 0.17112544178962708, "learning_rate": 7.275428721769606e-06, "loss": 0.503, "step": 7814 }, { "epoch": 1.9779802581624906, "grad_norm": 0.16388621926307678, "learning_rate": 7.273125058039025e-06, "loss": 0.4832, "step": 7815 }, { "epoch": 1.9782333586433816, "grad_norm": 0.17668947577476501, "learning_rate": 7.270821550634399e-06, "loss": 0.4551, "step": 7816 }, { "epoch": 1.9784864591242723, "grad_norm": 0.17168816924095154, "learning_rate": 7.268518199687784e-06, "loss": 0.4828, "step": 7817 }, { "epoch": 1.9787395596051631, "grad_norm": 0.1699429750442505, "learning_rate": 7.266215005331226e-06, "loss": 0.4833, "step": 7818 }, { "epoch": 1.9789926600860541, "grad_norm": 0.17266318202018738, "learning_rate": 7.263911967696762e-06, "loss": 0.4851, "step": 7819 }, { "epoch": 1.9792457605669451, "grad_norm": 0.17335090041160583, "learning_rate": 7.261609086916421e-06, "loss": 0.483, "step": 7820 }, { "epoch": 1.9794988610478361, "grad_norm": 0.16773931682109833, "learning_rate": 7.259306363122227e-06, "loss": 0.483, "step": 7821 }, { "epoch": 1.979751961528727, "grad_norm": 0.16867688298225403, "learning_rate": 7.257003796446181e-06, "loss": 0.4784, "step": 7822 }, { "epoch": 1.9800050620096177, "grad_norm": 0.17558002471923828, "learning_rate": 7.25470138702029e-06, "loss": 0.5026, "step": 7823 }, { "epoch": 1.9802581624905087, "grad_norm": 0.1704372763633728, "learning_rate": 7.252399134976544e-06, "loss": 0.4528, "step": 7824 }, { "epoch": 1.9805112629713997, "grad_norm": 0.1693730354309082, "learning_rate": 7.2500970404469265e-06, "loss": 0.4597, "step": 7825 }, { "epoch": 1.9807643634522907, "grad_norm": 0.1698480099439621, "learning_rate": 7.2477951035634175e-06, "loss": 0.4755, "step": 7826 }, { "epoch": 1.9810174639331815, "grad_norm": 0.1673423945903778, "learning_rate": 7.245493324457975e-06, "loss": 0.4683, "step": 7827 }, { "epoch": 1.9812705644140722, "grad_norm": 0.1713915914297104, "learning_rate": 7.2431917032625536e-06, "loss": 0.4674, "step": 7828 }, { "epoch": 1.9815236648949632, "grad_norm": 0.1741952747106552, "learning_rate": 7.240890240109106e-06, "loss": 0.4742, "step": 7829 }, { "epoch": 1.9817767653758542, "grad_norm": 0.17436432838439941, "learning_rate": 7.238588935129568e-06, "loss": 0.4872, "step": 7830 }, { "epoch": 1.9820298658567452, "grad_norm": 0.16921257972717285, "learning_rate": 7.236287788455869e-06, "loss": 0.4492, "step": 7831 }, { "epoch": 1.982282966337636, "grad_norm": 0.18082329630851746, "learning_rate": 7.233986800219931e-06, "loss": 0.4885, "step": 7832 }, { "epoch": 1.982536066818527, "grad_norm": 0.1757500022649765, "learning_rate": 7.23168597055366e-06, "loss": 0.4796, "step": 7833 }, { "epoch": 1.9827891672994178, "grad_norm": 0.1766796112060547, "learning_rate": 7.229385299588958e-06, "loss": 0.492, "step": 7834 }, { "epoch": 1.9830422677803088, "grad_norm": 0.1687791645526886, "learning_rate": 7.227084787457721e-06, "loss": 0.4879, "step": 7835 }, { "epoch": 1.9832953682611998, "grad_norm": 0.1716112345457077, "learning_rate": 7.224784434291832e-06, "loss": 0.4903, "step": 7836 }, { "epoch": 1.9835484687420906, "grad_norm": 0.16838473081588745, "learning_rate": 7.222484240223166e-06, "loss": 0.4644, "step": 7837 }, { "epoch": 1.9838015692229816, "grad_norm": 0.16852068901062012, "learning_rate": 7.220184205383587e-06, "loss": 0.4701, "step": 7838 }, { "epoch": 1.9840546697038723, "grad_norm": 0.16758541762828827, "learning_rate": 7.217884329904947e-06, "loss": 0.4422, "step": 7839 }, { "epoch": 1.9843077701847633, "grad_norm": 0.1731233149766922, "learning_rate": 7.2155846139191e-06, "loss": 0.495, "step": 7840 }, { "epoch": 1.9845608706656543, "grad_norm": 0.176176518201828, "learning_rate": 7.2132850575578774e-06, "loss": 0.4799, "step": 7841 }, { "epoch": 1.9848139711465453, "grad_norm": 0.1670072376728058, "learning_rate": 7.210985660953113e-06, "loss": 0.4919, "step": 7842 }, { "epoch": 1.9850670716274361, "grad_norm": 0.17476221919059753, "learning_rate": 7.208686424236626e-06, "loss": 0.4829, "step": 7843 }, { "epoch": 1.985320172108327, "grad_norm": 0.17929011583328247, "learning_rate": 7.206387347540225e-06, "loss": 0.4883, "step": 7844 }, { "epoch": 1.985573272589218, "grad_norm": 0.17134016752243042, "learning_rate": 7.204088430995709e-06, "loss": 0.4752, "step": 7845 }, { "epoch": 1.985826373070109, "grad_norm": 0.1691657304763794, "learning_rate": 7.201789674734874e-06, "loss": 0.4901, "step": 7846 }, { "epoch": 1.986079473551, "grad_norm": 0.23019546270370483, "learning_rate": 7.199491078889499e-06, "loss": 0.4865, "step": 7847 }, { "epoch": 1.9863325740318907, "grad_norm": 0.16982614994049072, "learning_rate": 7.197192643591366e-06, "loss": 0.4716, "step": 7848 }, { "epoch": 1.9865856745127815, "grad_norm": 0.16605906188488007, "learning_rate": 7.19489436897223e-06, "loss": 0.4562, "step": 7849 }, { "epoch": 1.9868387749936725, "grad_norm": 0.1885465532541275, "learning_rate": 7.192596255163846e-06, "loss": 0.4778, "step": 7850 }, { "epoch": 1.9870918754745635, "grad_norm": 0.16680894792079926, "learning_rate": 7.190298302297966e-06, "loss": 0.4821, "step": 7851 }, { "epoch": 1.9873449759554545, "grad_norm": 0.16924439370632172, "learning_rate": 7.188000510506324e-06, "loss": 0.4646, "step": 7852 }, { "epoch": 1.9875980764363452, "grad_norm": 0.1857837736606598, "learning_rate": 7.185702879920648e-06, "loss": 0.4981, "step": 7853 }, { "epoch": 1.987851176917236, "grad_norm": 0.1644161343574524, "learning_rate": 7.183405410672659e-06, "loss": 0.4723, "step": 7854 }, { "epoch": 1.988104277398127, "grad_norm": 0.17322340607643127, "learning_rate": 7.181108102894061e-06, "loss": 0.4721, "step": 7855 }, { "epoch": 1.988357377879018, "grad_norm": 0.17664334177970886, "learning_rate": 7.178810956716553e-06, "loss": 0.4684, "step": 7856 }, { "epoch": 1.988610478359909, "grad_norm": 0.16898873448371887, "learning_rate": 7.17651397227183e-06, "loss": 0.4845, "step": 7857 }, { "epoch": 1.9888635788407998, "grad_norm": 0.17118552327156067, "learning_rate": 7.174217149691574e-06, "loss": 0.5019, "step": 7858 }, { "epoch": 1.9891166793216906, "grad_norm": 0.16932256519794464, "learning_rate": 7.171920489107455e-06, "loss": 0.4728, "step": 7859 }, { "epoch": 1.9893697798025816, "grad_norm": 0.17379392683506012, "learning_rate": 7.169623990651134e-06, "loss": 0.441, "step": 7860 }, { "epoch": 1.9896228802834726, "grad_norm": 0.17178630828857422, "learning_rate": 7.1673276544542635e-06, "loss": 0.4863, "step": 7861 }, { "epoch": 1.9898759807643636, "grad_norm": 0.16900111734867096, "learning_rate": 7.1650314806484925e-06, "loss": 0.4734, "step": 7862 }, { "epoch": 1.9901290812452543, "grad_norm": 0.1710006445646286, "learning_rate": 7.16273546936545e-06, "loss": 0.4812, "step": 7863 }, { "epoch": 1.9903821817261451, "grad_norm": 0.17205290496349335, "learning_rate": 7.160439620736766e-06, "loss": 0.4725, "step": 7864 }, { "epoch": 1.9906352822070361, "grad_norm": 0.16720491647720337, "learning_rate": 7.158143934894057e-06, "loss": 0.4639, "step": 7865 }, { "epoch": 1.9908883826879271, "grad_norm": 0.17250779271125793, "learning_rate": 7.155848411968927e-06, "loss": 0.4789, "step": 7866 }, { "epoch": 1.9911414831688181, "grad_norm": 0.17676378786563873, "learning_rate": 7.1535530520929745e-06, "loss": 0.4933, "step": 7867 }, { "epoch": 1.991394583649709, "grad_norm": 0.17130261659622192, "learning_rate": 7.151257855397786e-06, "loss": 0.4853, "step": 7868 }, { "epoch": 1.9916476841306, "grad_norm": 0.16831089556217194, "learning_rate": 7.148962822014941e-06, "loss": 0.4812, "step": 7869 }, { "epoch": 1.9919007846114907, "grad_norm": 0.17391735315322876, "learning_rate": 7.1466679520760165e-06, "loss": 0.4815, "step": 7870 }, { "epoch": 1.9921538850923817, "grad_norm": 0.17403116822242737, "learning_rate": 7.144373245712559e-06, "loss": 0.5036, "step": 7871 }, { "epoch": 1.9924069855732727, "grad_norm": 0.16603845357894897, "learning_rate": 7.142078703056125e-06, "loss": 0.4616, "step": 7872 }, { "epoch": 1.9926600860541634, "grad_norm": 0.17155759036540985, "learning_rate": 7.1397843242382595e-06, "loss": 0.473, "step": 7873 }, { "epoch": 1.9929131865350544, "grad_norm": 0.1685040444135666, "learning_rate": 7.137490109390491e-06, "loss": 0.4571, "step": 7874 }, { "epoch": 1.9931662870159452, "grad_norm": 0.17067363858222961, "learning_rate": 7.135196058644343e-06, "loss": 0.4749, "step": 7875 }, { "epoch": 1.9934193874968362, "grad_norm": 0.1850992739200592, "learning_rate": 7.13290217213133e-06, "loss": 0.5013, "step": 7876 }, { "epoch": 1.9936724879777272, "grad_norm": 0.1732708066701889, "learning_rate": 7.130608449982951e-06, "loss": 0.4832, "step": 7877 }, { "epoch": 1.9939255884586182, "grad_norm": 0.1754414141178131, "learning_rate": 7.128314892330701e-06, "loss": 0.4648, "step": 7878 }, { "epoch": 1.994178688939509, "grad_norm": 0.17169566452503204, "learning_rate": 7.126021499306068e-06, "loss": 0.4725, "step": 7879 }, { "epoch": 1.9944317894203998, "grad_norm": 0.17234745621681213, "learning_rate": 7.123728271040526e-06, "loss": 0.4877, "step": 7880 }, { "epoch": 1.9946848899012908, "grad_norm": 0.17092259228229523, "learning_rate": 7.121435207665544e-06, "loss": 0.4577, "step": 7881 }, { "epoch": 1.9949379903821818, "grad_norm": 0.1767023801803589, "learning_rate": 7.119142309312575e-06, "loss": 0.4844, "step": 7882 }, { "epoch": 1.9951910908630728, "grad_norm": 0.17654553055763245, "learning_rate": 7.116849576113063e-06, "loss": 0.5016, "step": 7883 }, { "epoch": 1.9954441913439636, "grad_norm": 0.1699686497449875, "learning_rate": 7.114557008198451e-06, "loss": 0.4734, "step": 7884 }, { "epoch": 1.9956972918248543, "grad_norm": 0.17229993641376495, "learning_rate": 7.112264605700163e-06, "loss": 0.4714, "step": 7885 }, { "epoch": 1.9959503923057453, "grad_norm": 0.1922907531261444, "learning_rate": 7.10997236874962e-06, "loss": 0.4795, "step": 7886 }, { "epoch": 1.9962034927866363, "grad_norm": 0.17770697176456451, "learning_rate": 7.1076802974782345e-06, "loss": 0.4889, "step": 7887 }, { "epoch": 1.9964565932675273, "grad_norm": 0.17118380963802338, "learning_rate": 7.1053883920174e-06, "loss": 0.4639, "step": 7888 }, { "epoch": 1.9967096937484181, "grad_norm": 0.21772247552871704, "learning_rate": 7.103096652498507e-06, "loss": 0.4756, "step": 7889 }, { "epoch": 1.996962794229309, "grad_norm": 0.16622787714004517, "learning_rate": 7.100805079052939e-06, "loss": 0.4805, "step": 7890 }, { "epoch": 1.9972158947102, "grad_norm": 0.1662263721227646, "learning_rate": 7.098513671812067e-06, "loss": 0.4924, "step": 7891 }, { "epoch": 1.997468995191091, "grad_norm": 0.1698465198278427, "learning_rate": 7.096222430907248e-06, "loss": 0.4549, "step": 7892 }, { "epoch": 1.997722095671982, "grad_norm": 0.17456361651420593, "learning_rate": 7.093931356469844e-06, "loss": 0.4705, "step": 7893 }, { "epoch": 1.9979751961528727, "grad_norm": 0.1585163027048111, "learning_rate": 7.0916404486311854e-06, "loss": 0.466, "step": 7894 }, { "epoch": 1.9982282966337634, "grad_norm": 0.17032741010189056, "learning_rate": 7.089349707522612e-06, "loss": 0.469, "step": 7895 }, { "epoch": 1.9984813971146544, "grad_norm": 0.1910613775253296, "learning_rate": 7.087059133275444e-06, "loss": 0.4795, "step": 7896 }, { "epoch": 1.9987344975955454, "grad_norm": 0.1704208254814148, "learning_rate": 7.0847687260209986e-06, "loss": 0.4637, "step": 7897 }, { "epoch": 1.9989875980764364, "grad_norm": 0.16853463649749756, "learning_rate": 7.082478485890581e-06, "loss": 0.4557, "step": 7898 }, { "epoch": 1.9992406985573272, "grad_norm": 0.17389437556266785, "learning_rate": 7.080188413015479e-06, "loss": 0.4866, "step": 7899 }, { "epoch": 1.9994937990382182, "grad_norm": 0.16971814632415771, "learning_rate": 7.077898507526981e-06, "loss": 0.4846, "step": 7900 }, { "epoch": 1.999746899519109, "grad_norm": 0.16869160532951355, "learning_rate": 7.075608769556365e-06, "loss": 0.4651, "step": 7901 }, { "epoch": 2.0, "grad_norm": 0.17073746025562286, "learning_rate": 7.0733191992348945e-06, "loss": 0.4715, "step": 7902 }, { "epoch": 2.0, "eval_loss": 0.8005701303482056, "eval_runtime": 1059.4216, "eval_samples_per_second": 40.134, "eval_steps_per_second": 0.628, "step": 7902 }, { "epoch": 2.000253100480891, "grad_norm": 0.17606516182422638, "learning_rate": 7.0710297966938265e-06, "loss": 0.489, "step": 7903 }, { "epoch": 2.000506200961782, "grad_norm": 0.17560985684394836, "learning_rate": 7.0687405620644085e-06, "loss": 0.4957, "step": 7904 }, { "epoch": 2.0007593014426726, "grad_norm": 0.1686098873615265, "learning_rate": 7.066451495477875e-06, "loss": 0.4659, "step": 7905 }, { "epoch": 2.0010124019235636, "grad_norm": 0.16744129359722137, "learning_rate": 7.0641625970654534e-06, "loss": 0.4785, "step": 7906 }, { "epoch": 2.0012655024044546, "grad_norm": 0.17453601956367493, "learning_rate": 7.061873866958362e-06, "loss": 0.4899, "step": 7907 }, { "epoch": 2.0015186028853456, "grad_norm": 0.15870890021324158, "learning_rate": 7.0595853052878104e-06, "loss": 0.478, "step": 7908 }, { "epoch": 2.0017717033662366, "grad_norm": 0.17182306945323944, "learning_rate": 7.057296912184999e-06, "loss": 0.4825, "step": 7909 }, { "epoch": 2.002024803847127, "grad_norm": 0.17487576603889465, "learning_rate": 7.055008687781109e-06, "loss": 0.4867, "step": 7910 }, { "epoch": 2.002277904328018, "grad_norm": 0.18192631006240845, "learning_rate": 7.052720632207326e-06, "loss": 0.4896, "step": 7911 }, { "epoch": 2.002531004808909, "grad_norm": 0.16572974622249603, "learning_rate": 7.050432745594816e-06, "loss": 0.4787, "step": 7912 }, { "epoch": 2.0027841052898, "grad_norm": 0.16614460945129395, "learning_rate": 7.048145028074741e-06, "loss": 0.4711, "step": 7913 }, { "epoch": 2.003037205770691, "grad_norm": 0.17189592123031616, "learning_rate": 7.045857479778247e-06, "loss": 0.4853, "step": 7914 }, { "epoch": 2.0032903062515817, "grad_norm": 0.17135126888751984, "learning_rate": 7.043570100836484e-06, "loss": 0.4832, "step": 7915 }, { "epoch": 2.0035434067324727, "grad_norm": 0.17231567203998566, "learning_rate": 7.04128289138057e-06, "loss": 0.4532, "step": 7916 }, { "epoch": 2.0037965072133637, "grad_norm": 0.16857820749282837, "learning_rate": 7.038995851541632e-06, "loss": 0.5001, "step": 7917 }, { "epoch": 2.0040496076942547, "grad_norm": 0.17024247348308563, "learning_rate": 7.0367089814507835e-06, "loss": 0.4675, "step": 7918 }, { "epoch": 2.0043027081751457, "grad_norm": 0.16839346289634705, "learning_rate": 7.034422281239121e-06, "loss": 0.4793, "step": 7919 }, { "epoch": 2.0045558086560367, "grad_norm": 0.17464515566825867, "learning_rate": 7.032135751037742e-06, "loss": 0.4637, "step": 7920 }, { "epoch": 2.004808909136927, "grad_norm": 0.17479833960533142, "learning_rate": 7.029849390977721e-06, "loss": 0.4785, "step": 7921 }, { "epoch": 2.005062009617818, "grad_norm": 0.1767544001340866, "learning_rate": 7.027563201190135e-06, "loss": 0.4705, "step": 7922 }, { "epoch": 2.005315110098709, "grad_norm": 0.1720743626356125, "learning_rate": 7.025277181806044e-06, "loss": 0.4728, "step": 7923 }, { "epoch": 2.0055682105796, "grad_norm": 0.1742267906665802, "learning_rate": 7.022991332956503e-06, "loss": 0.4714, "step": 7924 }, { "epoch": 2.005821311060491, "grad_norm": 0.17634157836437225, "learning_rate": 7.020705654772555e-06, "loss": 0.4804, "step": 7925 }, { "epoch": 2.0060744115413818, "grad_norm": 0.17270693182945251, "learning_rate": 7.018420147385234e-06, "loss": 0.4822, "step": 7926 }, { "epoch": 2.0063275120222728, "grad_norm": 0.17614296078681946, "learning_rate": 7.016134810925559e-06, "loss": 0.4581, "step": 7927 }, { "epoch": 2.0065806125031638, "grad_norm": 0.16815364360809326, "learning_rate": 7.013849645524544e-06, "loss": 0.4754, "step": 7928 }, { "epoch": 2.0068337129840548, "grad_norm": 0.1704464852809906, "learning_rate": 7.011564651313194e-06, "loss": 0.4601, "step": 7929 }, { "epoch": 2.0070868134649458, "grad_norm": 0.1764889508485794, "learning_rate": 7.009279828422506e-06, "loss": 0.4535, "step": 7930 }, { "epoch": 2.0073399139458363, "grad_norm": 0.17234817147254944, "learning_rate": 7.006995176983462e-06, "loss": 0.469, "step": 7931 }, { "epoch": 2.0075930144267273, "grad_norm": 0.1695815771818161, "learning_rate": 7.004710697127035e-06, "loss": 0.4829, "step": 7932 }, { "epoch": 2.0078461149076183, "grad_norm": 0.1747860312461853, "learning_rate": 7.002426388984189e-06, "loss": 0.4809, "step": 7933 }, { "epoch": 2.0080992153885093, "grad_norm": 0.17773112654685974, "learning_rate": 7.0001422526858795e-06, "loss": 0.4981, "step": 7934 }, { "epoch": 2.0083523158694003, "grad_norm": 0.1719023734331131, "learning_rate": 6.997858288363052e-06, "loss": 0.4712, "step": 7935 }, { "epoch": 2.008605416350291, "grad_norm": 0.17243513464927673, "learning_rate": 6.995574496146638e-06, "loss": 0.4904, "step": 7936 }, { "epoch": 2.008858516831182, "grad_norm": 0.17226307094097137, "learning_rate": 6.993290876167571e-06, "loss": 0.4795, "step": 7937 }, { "epoch": 2.009111617312073, "grad_norm": 0.16962721943855286, "learning_rate": 6.9910074285567555e-06, "loss": 0.4605, "step": 7938 }, { "epoch": 2.009364717792964, "grad_norm": 0.17149601876735687, "learning_rate": 6.988724153445101e-06, "loss": 0.4684, "step": 7939 }, { "epoch": 2.009617818273855, "grad_norm": 0.16895705461502075, "learning_rate": 6.9864410509635036e-06, "loss": 0.4921, "step": 7940 }, { "epoch": 2.0098709187547454, "grad_norm": 0.17153093218803406, "learning_rate": 6.98415812124285e-06, "loss": 0.4583, "step": 7941 }, { "epoch": 2.0101240192356364, "grad_norm": 0.1698998510837555, "learning_rate": 6.98187536441401e-06, "loss": 0.4529, "step": 7942 }, { "epoch": 2.0103771197165274, "grad_norm": 0.1747225672006607, "learning_rate": 6.97959278060786e-06, "loss": 0.4906, "step": 7943 }, { "epoch": 2.0106302201974184, "grad_norm": 0.1689155250787735, "learning_rate": 6.9773103699552456e-06, "loss": 0.4736, "step": 7944 }, { "epoch": 2.0108833206783094, "grad_norm": 0.17067337036132812, "learning_rate": 6.975028132587013e-06, "loss": 0.4524, "step": 7945 }, { "epoch": 2.0111364211592, "grad_norm": 0.1770194172859192, "learning_rate": 6.972746068634003e-06, "loss": 0.4937, "step": 7946 }, { "epoch": 2.011389521640091, "grad_norm": 0.174177885055542, "learning_rate": 6.970464178227039e-06, "loss": 0.4746, "step": 7947 }, { "epoch": 2.011642622120982, "grad_norm": 0.1684405356645584, "learning_rate": 6.968182461496939e-06, "loss": 0.4771, "step": 7948 }, { "epoch": 2.011895722601873, "grad_norm": 0.16968780755996704, "learning_rate": 6.965900918574507e-06, "loss": 0.4795, "step": 7949 }, { "epoch": 2.012148823082764, "grad_norm": 0.17200466990470886, "learning_rate": 6.963619549590538e-06, "loss": 0.4601, "step": 7950 }, { "epoch": 2.0124019235636545, "grad_norm": 0.16832265257835388, "learning_rate": 6.961338354675818e-06, "loss": 0.4522, "step": 7951 }, { "epoch": 2.0126550240445455, "grad_norm": 0.1731673777103424, "learning_rate": 6.959057333961126e-06, "loss": 0.4679, "step": 7952 }, { "epoch": 2.0129081245254365, "grad_norm": 0.1750822514295578, "learning_rate": 6.956776487577226e-06, "loss": 0.4642, "step": 7953 }, { "epoch": 2.0131612250063275, "grad_norm": 0.17669685184955597, "learning_rate": 6.954495815654876e-06, "loss": 0.4856, "step": 7954 }, { "epoch": 2.0134143254872185, "grad_norm": 0.17956078052520752, "learning_rate": 6.95221531832482e-06, "loss": 0.4982, "step": 7955 }, { "epoch": 2.0136674259681095, "grad_norm": 0.167373925447464, "learning_rate": 6.9499349957177945e-06, "loss": 0.4928, "step": 7956 }, { "epoch": 2.013920526449, "grad_norm": 0.18790937960147858, "learning_rate": 6.947654847964525e-06, "loss": 0.4758, "step": 7957 }, { "epoch": 2.014173626929891, "grad_norm": 0.3147646188735962, "learning_rate": 6.945374875195727e-06, "loss": 0.4895, "step": 7958 }, { "epoch": 2.014426727410782, "grad_norm": 0.1709897816181183, "learning_rate": 6.943095077542113e-06, "loss": 0.4696, "step": 7959 }, { "epoch": 2.014679827891673, "grad_norm": 0.16950860619544983, "learning_rate": 6.940815455134369e-06, "loss": 0.4696, "step": 7960 }, { "epoch": 2.014932928372564, "grad_norm": 0.17215237021446228, "learning_rate": 6.938536008103187e-06, "loss": 0.4592, "step": 7961 }, { "epoch": 2.0151860288534547, "grad_norm": 0.17149502038955688, "learning_rate": 6.936256736579241e-06, "loss": 0.4799, "step": 7962 }, { "epoch": 2.0154391293343457, "grad_norm": 0.16798707842826843, "learning_rate": 6.933977640693198e-06, "loss": 0.4614, "step": 7963 }, { "epoch": 2.0156922298152367, "grad_norm": 0.16898111999034882, "learning_rate": 6.93169872057571e-06, "loss": 0.4588, "step": 7964 }, { "epoch": 2.0159453302961277, "grad_norm": 0.16766640543937683, "learning_rate": 6.929419976357434e-06, "loss": 0.4751, "step": 7965 }, { "epoch": 2.0161984307770187, "grad_norm": 0.19388172030448914, "learning_rate": 6.927141408168991e-06, "loss": 0.4659, "step": 7966 }, { "epoch": 2.016451531257909, "grad_norm": 0.1662943959236145, "learning_rate": 6.924863016141012e-06, "loss": 0.457, "step": 7967 }, { "epoch": 2.0167046317388, "grad_norm": 0.17755648493766785, "learning_rate": 6.9225848004041155e-06, "loss": 0.4797, "step": 7968 }, { "epoch": 2.016957732219691, "grad_norm": 0.2007530778646469, "learning_rate": 6.9203067610889045e-06, "loss": 0.4831, "step": 7969 }, { "epoch": 2.017210832700582, "grad_norm": 0.18567396700382233, "learning_rate": 6.918028898325975e-06, "loss": 0.4732, "step": 7970 }, { "epoch": 2.017463933181473, "grad_norm": 0.9371480345726013, "learning_rate": 6.915751212245909e-06, "loss": 0.4543, "step": 7971 }, { "epoch": 2.0177170336623638, "grad_norm": 0.1711091548204422, "learning_rate": 6.913473702979285e-06, "loss": 0.4553, "step": 7972 }, { "epoch": 2.0179701341432548, "grad_norm": 0.1820170134305954, "learning_rate": 6.911196370656663e-06, "loss": 0.4789, "step": 7973 }, { "epoch": 2.0182232346241458, "grad_norm": 0.17041128873825073, "learning_rate": 6.908919215408605e-06, "loss": 0.4431, "step": 7974 }, { "epoch": 2.0184763351050368, "grad_norm": 0.1699371039867401, "learning_rate": 6.90664223736565e-06, "loss": 0.4673, "step": 7975 }, { "epoch": 2.0187294355859278, "grad_norm": 0.17314539849758148, "learning_rate": 6.904365436658336e-06, "loss": 0.4533, "step": 7976 }, { "epoch": 2.0189825360668183, "grad_norm": 0.20987823605537415, "learning_rate": 6.902088813417183e-06, "loss": 0.4709, "step": 7977 }, { "epoch": 2.0192356365477093, "grad_norm": 0.17583873867988586, "learning_rate": 6.899812367772705e-06, "loss": 0.4667, "step": 7978 }, { "epoch": 2.0194887370286003, "grad_norm": 0.17184355854988098, "learning_rate": 6.8975360998554095e-06, "loss": 0.4662, "step": 7979 }, { "epoch": 2.0197418375094913, "grad_norm": 0.17255930602550507, "learning_rate": 6.895260009795786e-06, "loss": 0.4628, "step": 7980 }, { "epoch": 2.0199949379903823, "grad_norm": 0.17148908972740173, "learning_rate": 6.892984097724327e-06, "loss": 0.4648, "step": 7981 }, { "epoch": 2.020248038471273, "grad_norm": 0.18027715384960175, "learning_rate": 6.890708363771492e-06, "loss": 0.4601, "step": 7982 }, { "epoch": 2.020501138952164, "grad_norm": 0.1713658571243286, "learning_rate": 6.888432808067753e-06, "loss": 0.4575, "step": 7983 }, { "epoch": 2.020754239433055, "grad_norm": 0.17515158653259277, "learning_rate": 6.8861574307435604e-06, "loss": 0.474, "step": 7984 }, { "epoch": 2.021007339913946, "grad_norm": 0.1728339046239853, "learning_rate": 6.8838822319293585e-06, "loss": 0.4716, "step": 7985 }, { "epoch": 2.021260440394837, "grad_norm": 0.16640716791152954, "learning_rate": 6.881607211755574e-06, "loss": 0.5066, "step": 7986 }, { "epoch": 2.021513540875728, "grad_norm": 0.16746313869953156, "learning_rate": 6.879332370352642e-06, "loss": 0.4721, "step": 7987 }, { "epoch": 2.0217666413566184, "grad_norm": 0.16971342265605927, "learning_rate": 6.87705770785096e-06, "loss": 0.4423, "step": 7988 }, { "epoch": 2.0220197418375094, "grad_norm": 0.18279528617858887, "learning_rate": 6.874783224380932e-06, "loss": 0.4941, "step": 7989 }, { "epoch": 2.0222728423184004, "grad_norm": 0.1682359278202057, "learning_rate": 6.872508920072956e-06, "loss": 0.4677, "step": 7990 }, { "epoch": 2.0225259427992914, "grad_norm": 0.16643273830413818, "learning_rate": 6.8702347950574085e-06, "loss": 0.4516, "step": 7991 }, { "epoch": 2.0227790432801824, "grad_norm": 0.16890770196914673, "learning_rate": 6.867960849464664e-06, "loss": 0.4394, "step": 7992 }, { "epoch": 2.023032143761073, "grad_norm": 0.16875973343849182, "learning_rate": 6.865687083425078e-06, "loss": 0.4505, "step": 7993 }, { "epoch": 2.023285244241964, "grad_norm": 0.1718922108411789, "learning_rate": 6.863413497069002e-06, "loss": 0.4783, "step": 7994 }, { "epoch": 2.023538344722855, "grad_norm": 0.1752975434064865, "learning_rate": 6.861140090526776e-06, "loss": 0.4619, "step": 7995 }, { "epoch": 2.023791445203746, "grad_norm": 0.17571942508220673, "learning_rate": 6.858866863928733e-06, "loss": 0.4864, "step": 7996 }, { "epoch": 2.024044545684637, "grad_norm": 0.17293982207775116, "learning_rate": 6.856593817405188e-06, "loss": 0.4337, "step": 7997 }, { "epoch": 2.0242976461655275, "grad_norm": 0.16679759323596954, "learning_rate": 6.8543209510864525e-06, "loss": 0.4592, "step": 7998 }, { "epoch": 2.0245507466464185, "grad_norm": 0.1762828230857849, "learning_rate": 6.8520482651028246e-06, "loss": 0.462, "step": 7999 }, { "epoch": 2.0248038471273095, "grad_norm": 0.1707436591386795, "learning_rate": 6.8497757595845915e-06, "loss": 0.4697, "step": 8000 }, { "epoch": 2.0250569476082005, "grad_norm": 0.16866515576839447, "learning_rate": 6.84750343466203e-06, "loss": 0.4554, "step": 8001 }, { "epoch": 2.0253100480890915, "grad_norm": 0.17453482747077942, "learning_rate": 6.84523129046541e-06, "loss": 0.4542, "step": 8002 }, { "epoch": 2.025563148569982, "grad_norm": 0.17680677771568298, "learning_rate": 6.8429593271249915e-06, "loss": 0.464, "step": 8003 }, { "epoch": 2.025816249050873, "grad_norm": 0.16968657076358795, "learning_rate": 6.840687544771014e-06, "loss": 0.4952, "step": 8004 }, { "epoch": 2.026069349531764, "grad_norm": 0.1726396530866623, "learning_rate": 6.838415943533718e-06, "loss": 0.4973, "step": 8005 }, { "epoch": 2.026322450012655, "grad_norm": 0.16940627992153168, "learning_rate": 6.836144523543331e-06, "loss": 0.4434, "step": 8006 }, { "epoch": 2.026575550493546, "grad_norm": 0.17700399458408356, "learning_rate": 6.833873284930066e-06, "loss": 0.4488, "step": 8007 }, { "epoch": 2.0268286509744367, "grad_norm": 0.175448939204216, "learning_rate": 6.831602227824128e-06, "loss": 0.4768, "step": 8008 }, { "epoch": 2.0270817514553277, "grad_norm": 0.17096643149852753, "learning_rate": 6.82933135235572e-06, "loss": 0.4466, "step": 8009 }, { "epoch": 2.0273348519362187, "grad_norm": 0.17416810989379883, "learning_rate": 6.827060658655016e-06, "loss": 0.4713, "step": 8010 }, { "epoch": 2.0275879524171097, "grad_norm": 0.16602279245853424, "learning_rate": 6.8247901468521904e-06, "loss": 0.4644, "step": 8011 }, { "epoch": 2.0278410528980007, "grad_norm": 0.18132735788822174, "learning_rate": 6.822519817077413e-06, "loss": 0.4829, "step": 8012 }, { "epoch": 2.028094153378891, "grad_norm": 0.17020496726036072, "learning_rate": 6.820249669460834e-06, "loss": 0.4562, "step": 8013 }, { "epoch": 2.028347253859782, "grad_norm": 0.17114442586898804, "learning_rate": 6.817979704132597e-06, "loss": 0.4357, "step": 8014 }, { "epoch": 2.028600354340673, "grad_norm": 0.17222820222377777, "learning_rate": 6.8157099212228354e-06, "loss": 0.4472, "step": 8015 }, { "epoch": 2.028853454821564, "grad_norm": 0.1762465536594391, "learning_rate": 6.8134403208616676e-06, "loss": 0.4904, "step": 8016 }, { "epoch": 2.029106555302455, "grad_norm": 0.16961434483528137, "learning_rate": 6.811170903179204e-06, "loss": 0.4371, "step": 8017 }, { "epoch": 2.029359655783346, "grad_norm": 0.17021410167217255, "learning_rate": 6.808901668305551e-06, "loss": 0.4563, "step": 8018 }, { "epoch": 2.0296127562642368, "grad_norm": 0.17101165652275085, "learning_rate": 6.806632616370796e-06, "loss": 0.4441, "step": 8019 }, { "epoch": 2.0298658567451278, "grad_norm": 0.1768806129693985, "learning_rate": 6.804363747505021e-06, "loss": 0.4695, "step": 8020 }, { "epoch": 2.0301189572260188, "grad_norm": 0.1715802103281021, "learning_rate": 6.802095061838292e-06, "loss": 0.4218, "step": 8021 }, { "epoch": 2.0303720577069098, "grad_norm": 0.17042365670204163, "learning_rate": 6.799826559500669e-06, "loss": 0.4611, "step": 8022 }, { "epoch": 2.0306251581878008, "grad_norm": 0.17350633442401886, "learning_rate": 6.797558240622202e-06, "loss": 0.4871, "step": 8023 }, { "epoch": 2.0308782586686913, "grad_norm": 0.17287369072437286, "learning_rate": 6.7952901053329254e-06, "loss": 0.4734, "step": 8024 }, { "epoch": 2.0311313591495823, "grad_norm": 0.21521903574466705, "learning_rate": 6.79302215376287e-06, "loss": 0.4623, "step": 8025 }, { "epoch": 2.0313844596304733, "grad_norm": 0.17262089252471924, "learning_rate": 6.790754386042056e-06, "loss": 0.4902, "step": 8026 }, { "epoch": 2.0316375601113643, "grad_norm": 0.17832036316394806, "learning_rate": 6.788486802300482e-06, "loss": 0.485, "step": 8027 }, { "epoch": 2.0318906605922553, "grad_norm": 0.16940340399742126, "learning_rate": 6.7862194026681474e-06, "loss": 0.4743, "step": 8028 }, { "epoch": 2.032143761073146, "grad_norm": 0.17048649489879608, "learning_rate": 6.783952187275037e-06, "loss": 0.4643, "step": 8029 }, { "epoch": 2.032396861554037, "grad_norm": 0.173628032207489, "learning_rate": 6.781685156251123e-06, "loss": 0.4666, "step": 8030 }, { "epoch": 2.032649962034928, "grad_norm": 0.17721541225910187, "learning_rate": 6.77941830972638e-06, "loss": 0.4702, "step": 8031 }, { "epoch": 2.032903062515819, "grad_norm": 0.1676410585641861, "learning_rate": 6.7771516478307485e-06, "loss": 0.4459, "step": 8032 }, { "epoch": 2.03315616299671, "grad_norm": 0.17571000754833221, "learning_rate": 6.774885170694174e-06, "loss": 0.4332, "step": 8033 }, { "epoch": 2.0334092634776004, "grad_norm": 0.2012130469083786, "learning_rate": 6.772618878446595e-06, "loss": 0.4904, "step": 8034 }, { "epoch": 2.0336623639584914, "grad_norm": 0.17463375627994537, "learning_rate": 6.770352771217929e-06, "loss": 0.4658, "step": 8035 }, { "epoch": 2.0339154644393824, "grad_norm": 0.17269127070903778, "learning_rate": 6.768086849138088e-06, "loss": 0.4508, "step": 8036 }, { "epoch": 2.0341685649202734, "grad_norm": 0.1681426763534546, "learning_rate": 6.765821112336974e-06, "loss": 0.4627, "step": 8037 }, { "epoch": 2.0344216654011644, "grad_norm": 0.17850923538208008, "learning_rate": 6.763555560944473e-06, "loss": 0.4769, "step": 8038 }, { "epoch": 2.034674765882055, "grad_norm": 0.17215658724308014, "learning_rate": 6.761290195090464e-06, "loss": 0.4546, "step": 8039 }, { "epoch": 2.034927866362946, "grad_norm": 0.17277854681015015, "learning_rate": 6.759025014904821e-06, "loss": 0.4586, "step": 8040 }, { "epoch": 2.035180966843837, "grad_norm": 0.1760226935148239, "learning_rate": 6.756760020517399e-06, "loss": 0.4647, "step": 8041 }, { "epoch": 2.035434067324728, "grad_norm": 0.17879097163677216, "learning_rate": 6.754495212058046e-06, "loss": 0.4896, "step": 8042 }, { "epoch": 2.035687167805619, "grad_norm": 0.17053310573101044, "learning_rate": 6.7522305896565965e-06, "loss": 0.4438, "step": 8043 }, { "epoch": 2.0359402682865095, "grad_norm": 0.17094109952449799, "learning_rate": 6.74996615344288e-06, "loss": 0.4511, "step": 8044 }, { "epoch": 2.0361933687674005, "grad_norm": 0.17922094464302063, "learning_rate": 6.747701903546706e-06, "loss": 0.508, "step": 8045 }, { "epoch": 2.0364464692482915, "grad_norm": 0.1717858612537384, "learning_rate": 6.745437840097884e-06, "loss": 0.4277, "step": 8046 }, { "epoch": 2.0366995697291825, "grad_norm": 0.1737157553434372, "learning_rate": 6.743173963226208e-06, "loss": 0.4547, "step": 8047 }, { "epoch": 2.0369526702100735, "grad_norm": 0.17595550417900085, "learning_rate": 6.740910273061463e-06, "loss": 0.4512, "step": 8048 }, { "epoch": 2.0372057706909645, "grad_norm": 0.16973493993282318, "learning_rate": 6.738646769733414e-06, "loss": 0.4477, "step": 8049 }, { "epoch": 2.037458871171855, "grad_norm": 0.1727137416601181, "learning_rate": 6.736383453371831e-06, "loss": 0.4686, "step": 8050 }, { "epoch": 2.037711971652746, "grad_norm": 0.168095201253891, "learning_rate": 6.734120324106459e-06, "loss": 0.4551, "step": 8051 }, { "epoch": 2.037965072133637, "grad_norm": 0.17399710416793823, "learning_rate": 6.731857382067043e-06, "loss": 0.4311, "step": 8052 }, { "epoch": 2.038218172614528, "grad_norm": 0.1761341542005539, "learning_rate": 6.729594627383311e-06, "loss": 0.4745, "step": 8053 }, { "epoch": 2.038471273095419, "grad_norm": 0.17421405017375946, "learning_rate": 6.727332060184982e-06, "loss": 0.435, "step": 8054 }, { "epoch": 2.0387243735763096, "grad_norm": 0.17131267488002777, "learning_rate": 6.7250696806017605e-06, "loss": 0.4773, "step": 8055 }, { "epoch": 2.0389774740572006, "grad_norm": 0.17103464901447296, "learning_rate": 6.722807488763349e-06, "loss": 0.4588, "step": 8056 }, { "epoch": 2.0392305745380916, "grad_norm": 0.17236730456352234, "learning_rate": 6.720545484799432e-06, "loss": 0.4946, "step": 8057 }, { "epoch": 2.0394836750189826, "grad_norm": 0.16978387534618378, "learning_rate": 6.718283668839687e-06, "loss": 0.4786, "step": 8058 }, { "epoch": 2.0397367754998736, "grad_norm": 0.1761372983455658, "learning_rate": 6.71602204101378e-06, "loss": 0.4631, "step": 8059 }, { "epoch": 2.039989875980764, "grad_norm": 0.179939866065979, "learning_rate": 6.71376060145136e-06, "loss": 0.4684, "step": 8060 }, { "epoch": 2.040242976461655, "grad_norm": 0.17712362110614777, "learning_rate": 6.711499350282073e-06, "loss": 0.5098, "step": 8061 }, { "epoch": 2.040496076942546, "grad_norm": 0.17007271945476532, "learning_rate": 6.709238287635555e-06, "loss": 0.4792, "step": 8062 }, { "epoch": 2.040749177423437, "grad_norm": 0.1810372769832611, "learning_rate": 6.706977413641425e-06, "loss": 0.4627, "step": 8063 }, { "epoch": 2.041002277904328, "grad_norm": 0.1701544225215912, "learning_rate": 6.704716728429295e-06, "loss": 0.4391, "step": 8064 }, { "epoch": 2.0412553783852188, "grad_norm": 0.17354270815849304, "learning_rate": 6.7024562321287645e-06, "loss": 0.4576, "step": 8065 }, { "epoch": 2.0415084788661098, "grad_norm": 0.17570222914218903, "learning_rate": 6.7001959248694224e-06, "loss": 0.4704, "step": 8066 }, { "epoch": 2.0417615793470008, "grad_norm": 0.18311525881290436, "learning_rate": 6.697935806780849e-06, "loss": 0.4667, "step": 8067 }, { "epoch": 2.0420146798278918, "grad_norm": 0.1699714958667755, "learning_rate": 6.695675877992609e-06, "loss": 0.4415, "step": 8068 }, { "epoch": 2.0422677803087828, "grad_norm": 0.17903432250022888, "learning_rate": 6.693416138634263e-06, "loss": 0.4375, "step": 8069 }, { "epoch": 2.0425208807896733, "grad_norm": 0.17534539103507996, "learning_rate": 6.691156588835359e-06, "loss": 0.452, "step": 8070 }, { "epoch": 2.0427739812705643, "grad_norm": 0.17406289279460907, "learning_rate": 6.688897228725426e-06, "loss": 0.4498, "step": 8071 }, { "epoch": 2.0430270817514553, "grad_norm": 0.1678743213415146, "learning_rate": 6.686638058433992e-06, "loss": 0.4861, "step": 8072 }, { "epoch": 2.0432801822323463, "grad_norm": 0.17308692634105682, "learning_rate": 6.6843790780905694e-06, "loss": 0.4779, "step": 8073 }, { "epoch": 2.0435332827132373, "grad_norm": 0.17133846879005432, "learning_rate": 6.682120287824661e-06, "loss": 0.4778, "step": 8074 }, { "epoch": 2.043786383194128, "grad_norm": 0.17657551169395447, "learning_rate": 6.6798616877657566e-06, "loss": 0.4601, "step": 8075 }, { "epoch": 2.044039483675019, "grad_norm": 0.1697618067264557, "learning_rate": 6.677603278043343e-06, "loss": 0.4297, "step": 8076 }, { "epoch": 2.04429258415591, "grad_norm": 0.16647329926490784, "learning_rate": 6.675345058786882e-06, "loss": 0.4879, "step": 8077 }, { "epoch": 2.044545684636801, "grad_norm": 0.17124401032924652, "learning_rate": 6.673087030125838e-06, "loss": 0.4698, "step": 8078 }, { "epoch": 2.044798785117692, "grad_norm": 0.18118521571159363, "learning_rate": 6.670829192189656e-06, "loss": 0.4598, "step": 8079 }, { "epoch": 2.0450518855985824, "grad_norm": 0.17893216013908386, "learning_rate": 6.668571545107776e-06, "loss": 0.4597, "step": 8080 }, { "epoch": 2.0453049860794734, "grad_norm": 0.17922642827033997, "learning_rate": 6.666314089009625e-06, "loss": 0.4666, "step": 8081 }, { "epoch": 2.0455580865603644, "grad_norm": 0.16995470225811005, "learning_rate": 6.664056824024612e-06, "loss": 0.4656, "step": 8082 }, { "epoch": 2.0458111870412554, "grad_norm": 0.17121224105358124, "learning_rate": 6.6617997502821455e-06, "loss": 0.4633, "step": 8083 }, { "epoch": 2.0460642875221464, "grad_norm": 0.17743763327598572, "learning_rate": 6.659542867911617e-06, "loss": 0.4634, "step": 8084 }, { "epoch": 2.0463173880030374, "grad_norm": 0.17235533893108368, "learning_rate": 6.65728617704241e-06, "loss": 0.4878, "step": 8085 }, { "epoch": 2.046570488483928, "grad_norm": 0.16908346116542816, "learning_rate": 6.6550296778038984e-06, "loss": 0.4497, "step": 8086 }, { "epoch": 2.046823588964819, "grad_norm": 0.17423324286937714, "learning_rate": 6.65277337032544e-06, "loss": 0.4587, "step": 8087 }, { "epoch": 2.04707668944571, "grad_norm": 0.16956889629364014, "learning_rate": 6.650517254736383e-06, "loss": 0.4516, "step": 8088 }, { "epoch": 2.047329789926601, "grad_norm": 0.174799382686615, "learning_rate": 6.648261331166066e-06, "loss": 0.4496, "step": 8089 }, { "epoch": 2.047582890407492, "grad_norm": 0.17066386342048645, "learning_rate": 6.646005599743815e-06, "loss": 0.4598, "step": 8090 }, { "epoch": 2.0478359908883825, "grad_norm": 0.18845230340957642, "learning_rate": 6.643750060598952e-06, "loss": 0.4719, "step": 8091 }, { "epoch": 2.0480890913692735, "grad_norm": 0.1703442931175232, "learning_rate": 6.641494713860779e-06, "loss": 0.4698, "step": 8092 }, { "epoch": 2.0483421918501645, "grad_norm": 0.17254634201526642, "learning_rate": 6.6392395596585875e-06, "loss": 0.4683, "step": 8093 }, { "epoch": 2.0485952923310555, "grad_norm": 0.17093639075756073, "learning_rate": 6.636984598121664e-06, "loss": 0.4439, "step": 8094 }, { "epoch": 2.0488483928119465, "grad_norm": 0.17640197277069092, "learning_rate": 6.634729829379279e-06, "loss": 0.4472, "step": 8095 }, { "epoch": 2.049101493292837, "grad_norm": 0.1737445890903473, "learning_rate": 6.6324752535606955e-06, "loss": 0.4479, "step": 8096 }, { "epoch": 2.049354593773728, "grad_norm": 0.1778334230184555, "learning_rate": 6.630220870795159e-06, "loss": 0.4887, "step": 8097 }, { "epoch": 2.049607694254619, "grad_norm": 0.17307262122631073, "learning_rate": 6.627966681211919e-06, "loss": 0.4606, "step": 8098 }, { "epoch": 2.04986079473551, "grad_norm": 0.16956685483455658, "learning_rate": 6.6257126849401886e-06, "loss": 0.4553, "step": 8099 }, { "epoch": 2.050113895216401, "grad_norm": 0.17518068850040436, "learning_rate": 6.623458882109196e-06, "loss": 0.4541, "step": 8100 }, { "epoch": 2.0503669956972916, "grad_norm": 0.17090407013893127, "learning_rate": 6.6212052728481424e-06, "loss": 0.4738, "step": 8101 }, { "epoch": 2.0506200961781826, "grad_norm": 0.16844117641448975, "learning_rate": 6.618951857286224e-06, "loss": 0.4134, "step": 8102 }, { "epoch": 2.0508731966590736, "grad_norm": 0.1694728434085846, "learning_rate": 6.616698635552625e-06, "loss": 0.4697, "step": 8103 }, { "epoch": 2.0511262971399646, "grad_norm": 0.1799411177635193, "learning_rate": 6.614445607776514e-06, "loss": 0.4759, "step": 8104 }, { "epoch": 2.0513793976208556, "grad_norm": 0.19176387786865234, "learning_rate": 6.612192774087057e-06, "loss": 0.4632, "step": 8105 }, { "epoch": 2.051632498101746, "grad_norm": 0.17532293498516083, "learning_rate": 6.609940134613398e-06, "loss": 0.4505, "step": 8106 }, { "epoch": 2.051885598582637, "grad_norm": 0.1740732491016388, "learning_rate": 6.607687689484681e-06, "loss": 0.4805, "step": 8107 }, { "epoch": 2.052138699063528, "grad_norm": 0.17315161228179932, "learning_rate": 6.605435438830033e-06, "loss": 0.4393, "step": 8108 }, { "epoch": 2.052391799544419, "grad_norm": 0.1709533929824829, "learning_rate": 6.603183382778574e-06, "loss": 0.4552, "step": 8109 }, { "epoch": 2.05264490002531, "grad_norm": 0.17119541764259338, "learning_rate": 6.600931521459404e-06, "loss": 0.4601, "step": 8110 }, { "epoch": 2.0528980005062007, "grad_norm": 0.1721503585577011, "learning_rate": 6.59867985500162e-06, "loss": 0.4423, "step": 8111 }, { "epoch": 2.0531511009870917, "grad_norm": 0.1729585975408554, "learning_rate": 6.596428383534301e-06, "loss": 0.4578, "step": 8112 }, { "epoch": 2.0534042014679827, "grad_norm": 0.1751789003610611, "learning_rate": 6.5941771071865255e-06, "loss": 0.4698, "step": 8113 }, { "epoch": 2.0536573019488737, "grad_norm": 0.1751249134540558, "learning_rate": 6.591926026087358e-06, "loss": 0.4664, "step": 8114 }, { "epoch": 2.0539104024297647, "grad_norm": 0.1755913943052292, "learning_rate": 6.589675140365834e-06, "loss": 0.4507, "step": 8115 }, { "epoch": 2.0541635029106557, "grad_norm": 0.2018977403640747, "learning_rate": 6.587424450151003e-06, "loss": 0.4475, "step": 8116 }, { "epoch": 2.0544166033915463, "grad_norm": 0.17408645153045654, "learning_rate": 6.585173955571888e-06, "loss": 0.4609, "step": 8117 }, { "epoch": 2.0546697038724373, "grad_norm": 0.17483949661254883, "learning_rate": 6.582923656757509e-06, "loss": 0.4616, "step": 8118 }, { "epoch": 2.0549228043533283, "grad_norm": 0.17684024572372437, "learning_rate": 6.580673553836865e-06, "loss": 0.4766, "step": 8119 }, { "epoch": 2.0551759048342193, "grad_norm": 0.1758762001991272, "learning_rate": 6.578423646938958e-06, "loss": 0.4657, "step": 8120 }, { "epoch": 2.0554290053151103, "grad_norm": 0.17637470364570618, "learning_rate": 6.57617393619276e-06, "loss": 0.4641, "step": 8121 }, { "epoch": 2.055682105796001, "grad_norm": 0.1730562001466751, "learning_rate": 6.573924421727249e-06, "loss": 0.4516, "step": 8122 }, { "epoch": 2.055935206276892, "grad_norm": 0.17656387388706207, "learning_rate": 6.571675103671385e-06, "loss": 0.4619, "step": 8123 }, { "epoch": 2.056188306757783, "grad_norm": 0.17018459737300873, "learning_rate": 6.569425982154114e-06, "loss": 0.4762, "step": 8124 }, { "epoch": 2.056441407238674, "grad_norm": 0.2807812988758087, "learning_rate": 6.567177057304376e-06, "loss": 0.4487, "step": 8125 }, { "epoch": 2.056694507719565, "grad_norm": 0.16816486418247223, "learning_rate": 6.564928329251094e-06, "loss": 0.4502, "step": 8126 }, { "epoch": 2.0569476082004554, "grad_norm": 0.1724071353673935, "learning_rate": 6.562679798123184e-06, "loss": 0.4556, "step": 8127 }, { "epoch": 2.0572007086813464, "grad_norm": 0.17659837007522583, "learning_rate": 6.560431464049547e-06, "loss": 0.4674, "step": 8128 }, { "epoch": 2.0574538091622374, "grad_norm": 0.1752396523952484, "learning_rate": 6.55818332715908e-06, "loss": 0.4744, "step": 8129 }, { "epoch": 2.0577069096431284, "grad_norm": 0.17227807641029358, "learning_rate": 6.555935387580663e-06, "loss": 0.4623, "step": 8130 }, { "epoch": 2.0579600101240194, "grad_norm": 0.17688138782978058, "learning_rate": 6.553687645443167e-06, "loss": 0.431, "step": 8131 }, { "epoch": 2.05821311060491, "grad_norm": 0.17737579345703125, "learning_rate": 6.551440100875445e-06, "loss": 0.4435, "step": 8132 }, { "epoch": 2.058466211085801, "grad_norm": 0.17622019350528717, "learning_rate": 6.549192754006346e-06, "loss": 0.4781, "step": 8133 }, { "epoch": 2.058719311566692, "grad_norm": 0.16695070266723633, "learning_rate": 6.546945604964705e-06, "loss": 0.4417, "step": 8134 }, { "epoch": 2.058972412047583, "grad_norm": 0.17745652794837952, "learning_rate": 6.544698653879351e-06, "loss": 0.4705, "step": 8135 }, { "epoch": 2.059225512528474, "grad_norm": 0.17504172027111053, "learning_rate": 6.542451900879092e-06, "loss": 0.438, "step": 8136 }, { "epoch": 2.0594786130093645, "grad_norm": 0.1712186634540558, "learning_rate": 6.540205346092734e-06, "loss": 0.4591, "step": 8137 }, { "epoch": 2.0597317134902555, "grad_norm": 0.17614728212356567, "learning_rate": 6.537958989649063e-06, "loss": 0.4578, "step": 8138 }, { "epoch": 2.0599848139711465, "grad_norm": 0.17254167795181274, "learning_rate": 6.535712831676858e-06, "loss": 0.4605, "step": 8139 }, { "epoch": 2.0602379144520375, "grad_norm": 0.16991057991981506, "learning_rate": 6.533466872304888e-06, "loss": 0.4547, "step": 8140 }, { "epoch": 2.0604910149329285, "grad_norm": 0.17990268766880035, "learning_rate": 6.531221111661907e-06, "loss": 0.4835, "step": 8141 }, { "epoch": 2.060744115413819, "grad_norm": 0.17584890127182007, "learning_rate": 6.528975549876668e-06, "loss": 0.4538, "step": 8142 }, { "epoch": 2.06099721589471, "grad_norm": 0.17621199786663055, "learning_rate": 6.526730187077892e-06, "loss": 0.4568, "step": 8143 }, { "epoch": 2.061250316375601, "grad_norm": 0.1727132797241211, "learning_rate": 6.524485023394308e-06, "loss": 0.4538, "step": 8144 }, { "epoch": 2.061503416856492, "grad_norm": 0.1710619330406189, "learning_rate": 6.522240058954625e-06, "loss": 0.436, "step": 8145 }, { "epoch": 2.061756517337383, "grad_norm": 0.17339478433132172, "learning_rate": 6.519995293887542e-06, "loss": 0.4692, "step": 8146 }, { "epoch": 2.0620096178182736, "grad_norm": 0.1739133894443512, "learning_rate": 6.517750728321746e-06, "loss": 0.4716, "step": 8147 }, { "epoch": 2.0622627182991646, "grad_norm": 0.17766861617565155, "learning_rate": 6.51550636238592e-06, "loss": 0.4474, "step": 8148 }, { "epoch": 2.0625158187800556, "grad_norm": 0.17404116690158844, "learning_rate": 6.513262196208719e-06, "loss": 0.4639, "step": 8149 }, { "epoch": 2.0627689192609466, "grad_norm": 0.18054121732711792, "learning_rate": 6.511018229918798e-06, "loss": 0.469, "step": 8150 }, { "epoch": 2.0630220197418376, "grad_norm": 0.1834067553281784, "learning_rate": 6.508774463644802e-06, "loss": 0.4739, "step": 8151 }, { "epoch": 2.0632751202227286, "grad_norm": 0.1771842986345291, "learning_rate": 6.5065308975153595e-06, "loss": 0.4584, "step": 8152 }, { "epoch": 2.063528220703619, "grad_norm": 0.17215438187122345, "learning_rate": 6.504287531659094e-06, "loss": 0.472, "step": 8153 }, { "epoch": 2.06378132118451, "grad_norm": 0.17526015639305115, "learning_rate": 6.502044366204606e-06, "loss": 0.4676, "step": 8154 }, { "epoch": 2.064034421665401, "grad_norm": 0.174338236451149, "learning_rate": 6.4998014012804944e-06, "loss": 0.4599, "step": 8155 }, { "epoch": 2.064287522146292, "grad_norm": 0.17471830546855927, "learning_rate": 6.497558637015342e-06, "loss": 0.4501, "step": 8156 }, { "epoch": 2.064540622627183, "grad_norm": 0.17349857091903687, "learning_rate": 6.495316073537725e-06, "loss": 0.4511, "step": 8157 }, { "epoch": 2.0647937231080737, "grad_norm": 0.16786687076091766, "learning_rate": 6.493073710976206e-06, "loss": 0.4712, "step": 8158 }, { "epoch": 2.0650468235889647, "grad_norm": 0.17373056709766388, "learning_rate": 6.490831549459331e-06, "loss": 0.4792, "step": 8159 }, { "epoch": 2.0652999240698557, "grad_norm": 0.17360445857048035, "learning_rate": 6.488589589115638e-06, "loss": 0.4568, "step": 8160 }, { "epoch": 2.0655530245507467, "grad_norm": 0.17856505513191223, "learning_rate": 6.4863478300736556e-06, "loss": 0.4767, "step": 8161 }, { "epoch": 2.0658061250316377, "grad_norm": 0.17401725053787231, "learning_rate": 6.484106272461898e-06, "loss": 0.466, "step": 8162 }, { "epoch": 2.0660592255125283, "grad_norm": 0.17791251838207245, "learning_rate": 6.481864916408869e-06, "loss": 0.4646, "step": 8163 }, { "epoch": 2.0663123259934193, "grad_norm": 0.1742764413356781, "learning_rate": 6.479623762043068e-06, "loss": 0.4547, "step": 8164 }, { "epoch": 2.0665654264743103, "grad_norm": 0.17971232533454895, "learning_rate": 6.477382809492961e-06, "loss": 0.4413, "step": 8165 }, { "epoch": 2.0668185269552013, "grad_norm": 0.16634565591812134, "learning_rate": 6.47514205888703e-06, "loss": 0.4552, "step": 8166 }, { "epoch": 2.0670716274360923, "grad_norm": 0.17294776439666748, "learning_rate": 6.4729015103537244e-06, "loss": 0.4557, "step": 8167 }, { "epoch": 2.067324727916983, "grad_norm": 0.17551589012145996, "learning_rate": 6.470661164021496e-06, "loss": 0.4936, "step": 8168 }, { "epoch": 2.067577828397874, "grad_norm": 0.18341490626335144, "learning_rate": 6.468421020018774e-06, "loss": 0.4626, "step": 8169 }, { "epoch": 2.067830928878765, "grad_norm": 0.17392122745513916, "learning_rate": 6.466181078473988e-06, "loss": 0.4782, "step": 8170 }, { "epoch": 2.068084029359656, "grad_norm": 0.17240209877490997, "learning_rate": 6.4639413395155426e-06, "loss": 0.4471, "step": 8171 }, { "epoch": 2.068337129840547, "grad_norm": 0.17683903872966766, "learning_rate": 6.461701803271836e-06, "loss": 0.4697, "step": 8172 }, { "epoch": 2.0685902303214374, "grad_norm": 0.17867815494537354, "learning_rate": 6.459462469871261e-06, "loss": 0.4869, "step": 8173 }, { "epoch": 2.0688433308023284, "grad_norm": 0.17163343727588654, "learning_rate": 6.457223339442193e-06, "loss": 0.4627, "step": 8174 }, { "epoch": 2.0690964312832194, "grad_norm": 0.17229688167572021, "learning_rate": 6.454984412112999e-06, "loss": 0.457, "step": 8175 }, { "epoch": 2.0693495317641104, "grad_norm": 0.1778525859117508, "learning_rate": 6.452745688012025e-06, "loss": 0.4554, "step": 8176 }, { "epoch": 2.0696026322450014, "grad_norm": 0.17556528747081757, "learning_rate": 6.450507167267619e-06, "loss": 0.4561, "step": 8177 }, { "epoch": 2.069855732725892, "grad_norm": 0.17891168594360352, "learning_rate": 6.448268850008104e-06, "loss": 0.4659, "step": 8178 }, { "epoch": 2.070108833206783, "grad_norm": 0.17735807597637177, "learning_rate": 6.446030736361803e-06, "loss": 0.4647, "step": 8179 }, { "epoch": 2.070361933687674, "grad_norm": 0.18427976965904236, "learning_rate": 6.44379282645702e-06, "loss": 0.4683, "step": 8180 }, { "epoch": 2.070615034168565, "grad_norm": 0.1909165382385254, "learning_rate": 6.441555120422056e-06, "loss": 0.4752, "step": 8181 }, { "epoch": 2.070868134649456, "grad_norm": 0.17612075805664062, "learning_rate": 6.439317618385184e-06, "loss": 0.4711, "step": 8182 }, { "epoch": 2.071121235130347, "grad_norm": 0.17740467190742493, "learning_rate": 6.437080320474681e-06, "loss": 0.4864, "step": 8183 }, { "epoch": 2.0713743356112375, "grad_norm": 0.17776034772396088, "learning_rate": 6.434843226818805e-06, "loss": 0.4653, "step": 8184 }, { "epoch": 2.0716274360921285, "grad_norm": 0.17984658479690552, "learning_rate": 6.432606337545802e-06, "loss": 0.4477, "step": 8185 }, { "epoch": 2.0718805365730195, "grad_norm": 0.175062358379364, "learning_rate": 6.430369652783918e-06, "loss": 0.4375, "step": 8186 }, { "epoch": 2.0721336370539105, "grad_norm": 0.17149995267391205, "learning_rate": 6.428133172661362e-06, "loss": 0.4691, "step": 8187 }, { "epoch": 2.0723867375348015, "grad_norm": 0.17595185339450836, "learning_rate": 6.425896897306356e-06, "loss": 0.4604, "step": 8188 }, { "epoch": 2.072639838015692, "grad_norm": 0.17087793350219727, "learning_rate": 6.4236608268471016e-06, "loss": 0.4715, "step": 8189 }, { "epoch": 2.072892938496583, "grad_norm": 0.17191177606582642, "learning_rate": 6.421424961411784e-06, "loss": 0.4655, "step": 8190 }, { "epoch": 2.073146038977474, "grad_norm": 0.17362059652805328, "learning_rate": 6.419189301128581e-06, "loss": 0.4687, "step": 8191 }, { "epoch": 2.073399139458365, "grad_norm": 0.17347025871276855, "learning_rate": 6.416953846125664e-06, "loss": 0.4715, "step": 8192 }, { "epoch": 2.073652239939256, "grad_norm": 0.17094936966896057, "learning_rate": 6.414718596531181e-06, "loss": 0.4628, "step": 8193 }, { "epoch": 2.0739053404201466, "grad_norm": 0.16857360303401947, "learning_rate": 6.412483552473272e-06, "loss": 0.4458, "step": 8194 }, { "epoch": 2.0741584409010376, "grad_norm": 0.21986667811870575, "learning_rate": 6.410248714080074e-06, "loss": 0.4772, "step": 8195 }, { "epoch": 2.0744115413819286, "grad_norm": 0.17187124490737915, "learning_rate": 6.408014081479701e-06, "loss": 0.463, "step": 8196 }, { "epoch": 2.0746646418628196, "grad_norm": 0.17686498165130615, "learning_rate": 6.405779654800264e-06, "loss": 0.4911, "step": 8197 }, { "epoch": 2.0749177423437106, "grad_norm": 0.17704956233501434, "learning_rate": 6.403545434169853e-06, "loss": 0.4479, "step": 8198 }, { "epoch": 2.075170842824601, "grad_norm": 0.17623141407966614, "learning_rate": 6.4013114197165535e-06, "loss": 0.4848, "step": 8199 }, { "epoch": 2.075423943305492, "grad_norm": 0.1710946410894394, "learning_rate": 6.399077611568435e-06, "loss": 0.4613, "step": 8200 }, { "epoch": 2.075677043786383, "grad_norm": 0.177971750497818, "learning_rate": 6.39684400985356e-06, "loss": 0.4418, "step": 8201 }, { "epoch": 2.075930144267274, "grad_norm": 0.17592956125736237, "learning_rate": 6.394610614699976e-06, "loss": 0.4726, "step": 8202 }, { "epoch": 2.076183244748165, "grad_norm": 0.2801463007926941, "learning_rate": 6.392377426235718e-06, "loss": 0.462, "step": 8203 }, { "epoch": 2.0764363452290557, "grad_norm": 0.17692157626152039, "learning_rate": 6.390144444588808e-06, "loss": 0.4568, "step": 8204 }, { "epoch": 2.0766894457099467, "grad_norm": 0.17724458873271942, "learning_rate": 6.38791166988726e-06, "loss": 0.4642, "step": 8205 }, { "epoch": 2.0769425461908377, "grad_norm": 0.17603760957717896, "learning_rate": 6.385679102259074e-06, "loss": 0.4357, "step": 8206 }, { "epoch": 2.0771956466717287, "grad_norm": 0.17691189050674438, "learning_rate": 6.383446741832235e-06, "loss": 0.4596, "step": 8207 }, { "epoch": 2.0774487471526197, "grad_norm": 0.17309047281742096, "learning_rate": 6.381214588734726e-06, "loss": 0.4437, "step": 8208 }, { "epoch": 2.0777018476335103, "grad_norm": 0.18807101249694824, "learning_rate": 6.378982643094509e-06, "loss": 0.4569, "step": 8209 }, { "epoch": 2.0779549481144013, "grad_norm": 0.179022878408432, "learning_rate": 6.376750905039535e-06, "loss": 0.4645, "step": 8210 }, { "epoch": 2.0782080485952923, "grad_norm": 0.1738903969526291, "learning_rate": 6.374519374697745e-06, "loss": 0.4616, "step": 8211 }, { "epoch": 2.0784611490761833, "grad_norm": 0.1800098419189453, "learning_rate": 6.37228805219707e-06, "loss": 0.4921, "step": 8212 }, { "epoch": 2.0787142495570743, "grad_norm": 0.17797908186912537, "learning_rate": 6.370056937665425e-06, "loss": 0.4401, "step": 8213 }, { "epoch": 2.0789673500379653, "grad_norm": 0.1806929111480713, "learning_rate": 6.367826031230717e-06, "loss": 0.483, "step": 8214 }, { "epoch": 2.079220450518856, "grad_norm": 0.17470496892929077, "learning_rate": 6.3655953330208374e-06, "loss": 0.451, "step": 8215 }, { "epoch": 2.079473550999747, "grad_norm": 0.1646752804517746, "learning_rate": 6.363364843163665e-06, "loss": 0.4344, "step": 8216 }, { "epoch": 2.079726651480638, "grad_norm": 0.20537498593330383, "learning_rate": 6.361134561787074e-06, "loss": 0.4591, "step": 8217 }, { "epoch": 2.079979751961529, "grad_norm": 0.17635972797870636, "learning_rate": 6.35890448901892e-06, "loss": 0.4684, "step": 8218 }, { "epoch": 2.08023285244242, "grad_norm": 0.1752762645483017, "learning_rate": 6.356674624987047e-06, "loss": 0.4524, "step": 8219 }, { "epoch": 2.0804859529233104, "grad_norm": 0.18378829956054688, "learning_rate": 6.354444969819293e-06, "loss": 0.4672, "step": 8220 }, { "epoch": 2.0807390534042014, "grad_norm": 0.18708331882953644, "learning_rate": 6.352215523643473e-06, "loss": 0.4544, "step": 8221 }, { "epoch": 2.0809921538850924, "grad_norm": 0.17736278474330902, "learning_rate": 6.349986286587398e-06, "loss": 0.465, "step": 8222 }, { "epoch": 2.0812452543659834, "grad_norm": 0.17997519671916962, "learning_rate": 6.3477572587788665e-06, "loss": 0.4542, "step": 8223 }, { "epoch": 2.0814983548468744, "grad_norm": 0.1809675544500351, "learning_rate": 6.345528440345665e-06, "loss": 0.4503, "step": 8224 }, { "epoch": 2.081751455327765, "grad_norm": 0.17968106269836426, "learning_rate": 6.3432998314155694e-06, "loss": 0.4614, "step": 8225 }, { "epoch": 2.082004555808656, "grad_norm": 0.17339050769805908, "learning_rate": 6.341071432116335e-06, "loss": 0.4453, "step": 8226 }, { "epoch": 2.082257656289547, "grad_norm": 0.18014830350875854, "learning_rate": 6.338843242575713e-06, "loss": 0.4467, "step": 8227 }, { "epoch": 2.082510756770438, "grad_norm": 0.17791306972503662, "learning_rate": 6.3366152629214415e-06, "loss": 0.4543, "step": 8228 }, { "epoch": 2.082763857251329, "grad_norm": 0.1750633865594864, "learning_rate": 6.334387493281245e-06, "loss": 0.4745, "step": 8229 }, { "epoch": 2.0830169577322195, "grad_norm": 0.18467901647090912, "learning_rate": 6.33215993378284e-06, "loss": 0.461, "step": 8230 }, { "epoch": 2.0832700582131105, "grad_norm": 0.17568978667259216, "learning_rate": 6.329932584553927e-06, "loss": 0.4593, "step": 8231 }, { "epoch": 2.0835231586940015, "grad_norm": 0.2086566984653473, "learning_rate": 6.327705445722192e-06, "loss": 0.4389, "step": 8232 }, { "epoch": 2.0837762591748925, "grad_norm": 0.16826485097408295, "learning_rate": 6.325478517415314e-06, "loss": 0.4254, "step": 8233 }, { "epoch": 2.0840293596557835, "grad_norm": 0.16589459776878357, "learning_rate": 6.3232517997609586e-06, "loss": 0.4531, "step": 8234 }, { "epoch": 2.084282460136674, "grad_norm": 0.17640137672424316, "learning_rate": 6.321025292886777e-06, "loss": 0.4571, "step": 8235 }, { "epoch": 2.084535560617565, "grad_norm": 0.16961075365543365, "learning_rate": 6.3187989969204145e-06, "loss": 0.4494, "step": 8236 }, { "epoch": 2.084788661098456, "grad_norm": 0.17921586334705353, "learning_rate": 6.316572911989494e-06, "loss": 0.4521, "step": 8237 }, { "epoch": 2.085041761579347, "grad_norm": 0.1791069358587265, "learning_rate": 6.314347038221633e-06, "loss": 0.4636, "step": 8238 }, { "epoch": 2.085294862060238, "grad_norm": 0.1810123324394226, "learning_rate": 6.312121375744441e-06, "loss": 0.4719, "step": 8239 }, { "epoch": 2.0855479625411286, "grad_norm": 0.1794518679380417, "learning_rate": 6.3098959246855075e-06, "loss": 0.4735, "step": 8240 }, { "epoch": 2.0858010630220196, "grad_norm": 0.17240066826343536, "learning_rate": 6.307670685172412e-06, "loss": 0.449, "step": 8241 }, { "epoch": 2.0860541635029106, "grad_norm": 0.18018905818462372, "learning_rate": 6.305445657332727e-06, "loss": 0.4545, "step": 8242 }, { "epoch": 2.0863072639838016, "grad_norm": 0.17633172869682312, "learning_rate": 6.303220841294002e-06, "loss": 0.4734, "step": 8243 }, { "epoch": 2.0865603644646926, "grad_norm": 0.17668838798999786, "learning_rate": 6.300996237183786e-06, "loss": 0.4666, "step": 8244 }, { "epoch": 2.0868134649455836, "grad_norm": 0.17154479026794434, "learning_rate": 6.298771845129606e-06, "loss": 0.4559, "step": 8245 }, { "epoch": 2.087066565426474, "grad_norm": 0.17777568101882935, "learning_rate": 6.296547665258987e-06, "loss": 0.4733, "step": 8246 }, { "epoch": 2.087319665907365, "grad_norm": 0.1812816709280014, "learning_rate": 6.294323697699438e-06, "loss": 0.4902, "step": 8247 }, { "epoch": 2.087572766388256, "grad_norm": 0.17460046708583832, "learning_rate": 6.292099942578447e-06, "loss": 0.4615, "step": 8248 }, { "epoch": 2.087825866869147, "grad_norm": 0.17764559388160706, "learning_rate": 6.289876400023502e-06, "loss": 0.4731, "step": 8249 }, { "epoch": 2.088078967350038, "grad_norm": 0.1732087880373001, "learning_rate": 6.287653070162072e-06, "loss": 0.4475, "step": 8250 }, { "epoch": 2.0883320678309287, "grad_norm": 0.17381519079208374, "learning_rate": 6.285429953121616e-06, "loss": 0.4231, "step": 8251 }, { "epoch": 2.0885851683118197, "grad_norm": 0.17365838587284088, "learning_rate": 6.2832070490295825e-06, "loss": 0.4633, "step": 8252 }, { "epoch": 2.0888382687927107, "grad_norm": 0.17507271468639374, "learning_rate": 6.280984358013407e-06, "loss": 0.463, "step": 8253 }, { "epoch": 2.0890913692736017, "grad_norm": 0.17540834844112396, "learning_rate": 6.278761880200506e-06, "loss": 0.4594, "step": 8254 }, { "epoch": 2.0893444697544927, "grad_norm": 0.1764518916606903, "learning_rate": 6.2765396157182935e-06, "loss": 0.4557, "step": 8255 }, { "epoch": 2.0895975702353833, "grad_norm": 0.18006297945976257, "learning_rate": 6.274317564694168e-06, "loss": 0.4499, "step": 8256 }, { "epoch": 2.0898506707162743, "grad_norm": 0.17571650445461273, "learning_rate": 6.272095727255512e-06, "loss": 0.4557, "step": 8257 }, { "epoch": 2.0901037711971653, "grad_norm": 0.17777122557163239, "learning_rate": 6.269874103529702e-06, "loss": 0.4739, "step": 8258 }, { "epoch": 2.0903568716780563, "grad_norm": 0.1973438411951065, "learning_rate": 6.267652693644095e-06, "loss": 0.4669, "step": 8259 }, { "epoch": 2.0906099721589473, "grad_norm": 0.17920438945293427, "learning_rate": 6.26543149772604e-06, "loss": 0.456, "step": 8260 }, { "epoch": 2.090863072639838, "grad_norm": 0.17344117164611816, "learning_rate": 6.2632105159028776e-06, "loss": 0.4377, "step": 8261 }, { "epoch": 2.091116173120729, "grad_norm": 0.16844868659973145, "learning_rate": 6.260989748301929e-06, "loss": 0.4321, "step": 8262 }, { "epoch": 2.09136927360162, "grad_norm": 0.1758665144443512, "learning_rate": 6.258769195050508e-06, "loss": 0.4524, "step": 8263 }, { "epoch": 2.091622374082511, "grad_norm": 0.17467430233955383, "learning_rate": 6.2565488562759146e-06, "loss": 0.4644, "step": 8264 }, { "epoch": 2.091875474563402, "grad_norm": 0.17906713485717773, "learning_rate": 6.254328732105433e-06, "loss": 0.5021, "step": 8265 }, { "epoch": 2.0921285750442924, "grad_norm": 0.1831628680229187, "learning_rate": 6.252108822666339e-06, "loss": 0.4624, "step": 8266 }, { "epoch": 2.0923816755251834, "grad_norm": 0.17079396545886993, "learning_rate": 6.249889128085893e-06, "loss": 0.4364, "step": 8267 }, { "epoch": 2.0926347760060744, "grad_norm": 0.17843008041381836, "learning_rate": 6.247669648491352e-06, "loss": 0.4627, "step": 8268 }, { "epoch": 2.0928878764869654, "grad_norm": 0.17515473067760468, "learning_rate": 6.24545038400995e-06, "loss": 0.4636, "step": 8269 }, { "epoch": 2.0931409769678564, "grad_norm": 0.17306584119796753, "learning_rate": 6.243231334768915e-06, "loss": 0.4619, "step": 8270 }, { "epoch": 2.093394077448747, "grad_norm": 0.1699809432029724, "learning_rate": 6.241012500895456e-06, "loss": 0.4621, "step": 8271 }, { "epoch": 2.093647177929638, "grad_norm": 0.1773194968700409, "learning_rate": 6.238793882516778e-06, "loss": 0.462, "step": 8272 }, { "epoch": 2.093900278410529, "grad_norm": 0.17337815463542938, "learning_rate": 6.236575479760065e-06, "loss": 0.4424, "step": 8273 }, { "epoch": 2.09415337889142, "grad_norm": 0.17358113825321198, "learning_rate": 6.234357292752499e-06, "loss": 0.4606, "step": 8274 }, { "epoch": 2.094406479372311, "grad_norm": 0.17264018952846527, "learning_rate": 6.232139321621246e-06, "loss": 0.434, "step": 8275 }, { "epoch": 2.094659579853202, "grad_norm": 0.1863088756799698, "learning_rate": 6.229921566493447e-06, "loss": 0.4604, "step": 8276 }, { "epoch": 2.0949126803340925, "grad_norm": 0.18187153339385986, "learning_rate": 6.227704027496249e-06, "loss": 0.4605, "step": 8277 }, { "epoch": 2.0951657808149835, "grad_norm": 0.17107565701007843, "learning_rate": 6.2254867047567765e-06, "loss": 0.4477, "step": 8278 }, { "epoch": 2.0954188812958745, "grad_norm": 0.1718006730079651, "learning_rate": 6.223269598402146e-06, "loss": 0.4477, "step": 8279 }, { "epoch": 2.0956719817767655, "grad_norm": 0.2404904067516327, "learning_rate": 6.221052708559454e-06, "loss": 0.4471, "step": 8280 }, { "epoch": 2.0959250822576565, "grad_norm": 0.1790582537651062, "learning_rate": 6.218836035355802e-06, "loss": 0.4555, "step": 8281 }, { "epoch": 2.096178182738547, "grad_norm": 0.17668697237968445, "learning_rate": 6.216619578918253e-06, "loss": 0.469, "step": 8282 }, { "epoch": 2.096431283219438, "grad_norm": 0.18060137331485748, "learning_rate": 6.21440333937388e-06, "loss": 0.4564, "step": 8283 }, { "epoch": 2.096684383700329, "grad_norm": 0.17562885582447052, "learning_rate": 6.212187316849734e-06, "loss": 0.4348, "step": 8284 }, { "epoch": 2.09693748418122, "grad_norm": 0.18609929084777832, "learning_rate": 6.209971511472853e-06, "loss": 0.4746, "step": 8285 }, { "epoch": 2.097190584662111, "grad_norm": 0.18361206352710724, "learning_rate": 6.207755923370269e-06, "loss": 0.4604, "step": 8286 }, { "epoch": 2.0974436851430016, "grad_norm": 0.18336068093776703, "learning_rate": 6.205540552668991e-06, "loss": 0.4534, "step": 8287 }, { "epoch": 2.0976967856238926, "grad_norm": 0.1736464649438858, "learning_rate": 6.203325399496023e-06, "loss": 0.4703, "step": 8288 }, { "epoch": 2.0979498861047836, "grad_norm": 0.1809067577123642, "learning_rate": 6.2011104639783546e-06, "loss": 0.4547, "step": 8289 }, { "epoch": 2.0982029865856746, "grad_norm": 0.17479754984378815, "learning_rate": 6.198895746242968e-06, "loss": 0.4665, "step": 8290 }, { "epoch": 2.0984560870665656, "grad_norm": 0.1744748055934906, "learning_rate": 6.196681246416824e-06, "loss": 0.4916, "step": 8291 }, { "epoch": 2.098709187547456, "grad_norm": 0.17370782792568207, "learning_rate": 6.1944669646268795e-06, "loss": 0.465, "step": 8292 }, { "epoch": 2.098962288028347, "grad_norm": 0.18003319203853607, "learning_rate": 6.19225290100007e-06, "loss": 0.4698, "step": 8293 }, { "epoch": 2.099215388509238, "grad_norm": 0.17642048001289368, "learning_rate": 6.190039055663324e-06, "loss": 0.4658, "step": 8294 }, { "epoch": 2.099468488990129, "grad_norm": 0.17477881908416748, "learning_rate": 6.187825428743555e-06, "loss": 0.4671, "step": 8295 }, { "epoch": 2.09972158947102, "grad_norm": 0.17149104177951813, "learning_rate": 6.185612020367669e-06, "loss": 0.4601, "step": 8296 }, { "epoch": 2.0999746899519107, "grad_norm": 0.17512166500091553, "learning_rate": 6.1833988306625595e-06, "loss": 0.4707, "step": 8297 }, { "epoch": 2.1002277904328017, "grad_norm": 0.17431995272636414, "learning_rate": 6.1811858597550924e-06, "loss": 0.4608, "step": 8298 }, { "epoch": 2.1004808909136927, "grad_norm": 0.17676712572574615, "learning_rate": 6.17897310777214e-06, "loss": 0.4573, "step": 8299 }, { "epoch": 2.1007339913945837, "grad_norm": 0.18188023567199707, "learning_rate": 6.176760574840557e-06, "loss": 0.4417, "step": 8300 }, { "epoch": 2.1009870918754747, "grad_norm": 0.18518802523612976, "learning_rate": 6.174548261087178e-06, "loss": 0.4742, "step": 8301 }, { "epoch": 2.1012401923563653, "grad_norm": 0.17559710144996643, "learning_rate": 6.17233616663883e-06, "loss": 0.4732, "step": 8302 }, { "epoch": 2.1014932928372563, "grad_norm": 0.1779547780752182, "learning_rate": 6.170124291622338e-06, "loss": 0.4814, "step": 8303 }, { "epoch": 2.1017463933181473, "grad_norm": 0.18480615317821503, "learning_rate": 6.16791263616449e-06, "loss": 0.4868, "step": 8304 }, { "epoch": 2.1019994937990383, "grad_norm": 0.17628253996372223, "learning_rate": 6.1657012003920825e-06, "loss": 0.4541, "step": 8305 }, { "epoch": 2.1022525942799293, "grad_norm": 0.16577453911304474, "learning_rate": 6.163489984431893e-06, "loss": 0.4389, "step": 8306 }, { "epoch": 2.10250569476082, "grad_norm": 0.3750302195549011, "learning_rate": 6.161278988410684e-06, "loss": 0.4485, "step": 8307 }, { "epoch": 2.102758795241711, "grad_norm": 0.1762150079011917, "learning_rate": 6.15906821245521e-06, "loss": 0.4535, "step": 8308 }, { "epoch": 2.103011895722602, "grad_norm": 0.17387552559375763, "learning_rate": 6.1568576566922055e-06, "loss": 0.4474, "step": 8309 }, { "epoch": 2.103264996203493, "grad_norm": 0.17970232665538788, "learning_rate": 6.1546473212483995e-06, "loss": 0.45, "step": 8310 }, { "epoch": 2.103518096684384, "grad_norm": 0.1717676967382431, "learning_rate": 6.1524372062505035e-06, "loss": 0.4591, "step": 8311 }, { "epoch": 2.1037711971652744, "grad_norm": 0.17637598514556885, "learning_rate": 6.150227311825223e-06, "loss": 0.4444, "step": 8312 }, { "epoch": 2.1040242976461654, "grad_norm": 0.17185239493846893, "learning_rate": 6.148017638099244e-06, "loss": 0.4385, "step": 8313 }, { "epoch": 2.1042773981270564, "grad_norm": 0.1806570142507553, "learning_rate": 6.145808185199246e-06, "loss": 0.4594, "step": 8314 }, { "epoch": 2.1045304986079474, "grad_norm": 0.17561261355876923, "learning_rate": 6.143598953251885e-06, "loss": 0.4446, "step": 8315 }, { "epoch": 2.1047835990888384, "grad_norm": 0.17679639160633087, "learning_rate": 6.141389942383817e-06, "loss": 0.4721, "step": 8316 }, { "epoch": 2.1050366995697294, "grad_norm": 0.1739262193441391, "learning_rate": 6.139181152721677e-06, "loss": 0.4644, "step": 8317 }, { "epoch": 2.10528980005062, "grad_norm": 0.1766633242368698, "learning_rate": 6.136972584392094e-06, "loss": 0.4836, "step": 8318 }, { "epoch": 2.105542900531511, "grad_norm": 0.16991651058197021, "learning_rate": 6.134764237521681e-06, "loss": 0.4608, "step": 8319 }, { "epoch": 2.105796001012402, "grad_norm": 0.17830967903137207, "learning_rate": 6.132556112237029e-06, "loss": 0.4609, "step": 8320 }, { "epoch": 2.106049101493293, "grad_norm": 0.17704318463802338, "learning_rate": 6.1303482086647345e-06, "loss": 0.4429, "step": 8321 }, { "epoch": 2.106302201974184, "grad_norm": 0.17407138645648956, "learning_rate": 6.128140526931369e-06, "loss": 0.4695, "step": 8322 }, { "epoch": 2.1065553024550745, "grad_norm": 0.17908725142478943, "learning_rate": 6.125933067163492e-06, "loss": 0.4635, "step": 8323 }, { "epoch": 2.1068084029359655, "grad_norm": 0.1867150366306305, "learning_rate": 6.1237258294876546e-06, "loss": 0.4598, "step": 8324 }, { "epoch": 2.1070615034168565, "grad_norm": 0.18397390842437744, "learning_rate": 6.121518814030398e-06, "loss": 0.4645, "step": 8325 }, { "epoch": 2.1073146038977475, "grad_norm": 0.17162275314331055, "learning_rate": 6.119312020918234e-06, "loss": 0.4549, "step": 8326 }, { "epoch": 2.1075677043786385, "grad_norm": 0.177134707570076, "learning_rate": 6.117105450277683e-06, "loss": 0.4462, "step": 8327 }, { "epoch": 2.107820804859529, "grad_norm": 0.17270410060882568, "learning_rate": 6.114899102235239e-06, "loss": 0.4552, "step": 8328 }, { "epoch": 2.10807390534042, "grad_norm": 0.17139074206352234, "learning_rate": 6.112692976917388e-06, "loss": 0.4468, "step": 8329 }, { "epoch": 2.108327005821311, "grad_norm": 0.1756252497434616, "learning_rate": 6.110487074450602e-06, "loss": 0.4624, "step": 8330 }, { "epoch": 2.108580106302202, "grad_norm": 0.1807309240102768, "learning_rate": 6.1082813949613465e-06, "loss": 0.4527, "step": 8331 }, { "epoch": 2.108833206783093, "grad_norm": 0.18030834197998047, "learning_rate": 6.106075938576059e-06, "loss": 0.4698, "step": 8332 }, { "epoch": 2.1090863072639836, "grad_norm": 0.17370426654815674, "learning_rate": 6.103870705421178e-06, "loss": 0.459, "step": 8333 }, { "epoch": 2.1093394077448746, "grad_norm": 0.16999617218971252, "learning_rate": 6.101665695623125e-06, "loss": 0.4783, "step": 8334 }, { "epoch": 2.1095925082257656, "grad_norm": 0.17276009917259216, "learning_rate": 6.099460909308308e-06, "loss": 0.4485, "step": 8335 }, { "epoch": 2.1098456087066566, "grad_norm": 0.1766912043094635, "learning_rate": 6.097256346603126e-06, "loss": 0.4488, "step": 8336 }, { "epoch": 2.1100987091875476, "grad_norm": 0.18118047714233398, "learning_rate": 6.095052007633957e-06, "loss": 0.4613, "step": 8337 }, { "epoch": 2.110351809668438, "grad_norm": 0.17488673329353333, "learning_rate": 6.092847892527171e-06, "loss": 0.463, "step": 8338 }, { "epoch": 2.110604910149329, "grad_norm": 0.2227279543876648, "learning_rate": 6.090644001409128e-06, "loss": 0.4559, "step": 8339 }, { "epoch": 2.11085801063022, "grad_norm": 0.17607957124710083, "learning_rate": 6.0884403344061735e-06, "loss": 0.4675, "step": 8340 }, { "epoch": 2.111111111111111, "grad_norm": 0.1827014833688736, "learning_rate": 6.0862368916446365e-06, "loss": 0.4576, "step": 8341 }, { "epoch": 2.111364211592002, "grad_norm": 0.18268375098705292, "learning_rate": 6.084033673250839e-06, "loss": 0.4575, "step": 8342 }, { "epoch": 2.1116173120728927, "grad_norm": 0.1812678575515747, "learning_rate": 6.0818306793510816e-06, "loss": 0.4168, "step": 8343 }, { "epoch": 2.1118704125537837, "grad_norm": 0.17757651209831238, "learning_rate": 6.079627910071659e-06, "loss": 0.4611, "step": 8344 }, { "epoch": 2.1121235130346747, "grad_norm": 0.17357240617275238, "learning_rate": 6.077425365538854e-06, "loss": 0.4498, "step": 8345 }, { "epoch": 2.1123766135155657, "grad_norm": 0.17559750378131866, "learning_rate": 6.075223045878931e-06, "loss": 0.4311, "step": 8346 }, { "epoch": 2.1126297139964567, "grad_norm": 0.1788448840379715, "learning_rate": 6.0730209512181495e-06, "loss": 0.4704, "step": 8347 }, { "epoch": 2.1128828144773477, "grad_norm": 0.17584098875522614, "learning_rate": 6.07081908168274e-06, "loss": 0.4529, "step": 8348 }, { "epoch": 2.1131359149582383, "grad_norm": 0.19419962167739868, "learning_rate": 6.068617437398942e-06, "loss": 0.4792, "step": 8349 }, { "epoch": 2.1133890154391293, "grad_norm": 0.18910528719425201, "learning_rate": 6.066416018492965e-06, "loss": 0.4566, "step": 8350 }, { "epoch": 2.1136421159200203, "grad_norm": 0.17420798540115356, "learning_rate": 6.064214825091015e-06, "loss": 0.446, "step": 8351 }, { "epoch": 2.1138952164009113, "grad_norm": 0.18611347675323486, "learning_rate": 6.0620138573192775e-06, "loss": 0.44, "step": 8352 }, { "epoch": 2.1141483168818023, "grad_norm": 0.18352103233337402, "learning_rate": 6.0598131153039385e-06, "loss": 0.4597, "step": 8353 }, { "epoch": 2.114401417362693, "grad_norm": 0.17401187121868134, "learning_rate": 6.057612599171152e-06, "loss": 0.4538, "step": 8354 }, { "epoch": 2.114654517843584, "grad_norm": 0.18590238690376282, "learning_rate": 6.05541230904707e-06, "loss": 0.4468, "step": 8355 }, { "epoch": 2.114907618324475, "grad_norm": 0.16827228665351868, "learning_rate": 6.053212245057833e-06, "loss": 0.4408, "step": 8356 }, { "epoch": 2.115160718805366, "grad_norm": 0.17165833711624146, "learning_rate": 6.051012407329569e-06, "loss": 0.4515, "step": 8357 }, { "epoch": 2.115413819286257, "grad_norm": 0.17197468876838684, "learning_rate": 6.048812795988388e-06, "loss": 0.4558, "step": 8358 }, { "epoch": 2.1156669197671474, "grad_norm": 0.18327516317367554, "learning_rate": 6.046613411160384e-06, "loss": 0.4437, "step": 8359 }, { "epoch": 2.1159200202480384, "grad_norm": 0.17828911542892456, "learning_rate": 6.044414252971649e-06, "loss": 0.4468, "step": 8360 }, { "epoch": 2.1161731207289294, "grad_norm": 0.18909882009029388, "learning_rate": 6.042215321548253e-06, "loss": 0.4246, "step": 8361 }, { "epoch": 2.1164262212098204, "grad_norm": 0.18812048435211182, "learning_rate": 6.040016617016257e-06, "loss": 0.4645, "step": 8362 }, { "epoch": 2.1166793216907114, "grad_norm": 0.17187979817390442, "learning_rate": 6.03781813950171e-06, "loss": 0.4387, "step": 8363 }, { "epoch": 2.116932422171602, "grad_norm": 0.17714500427246094, "learning_rate": 6.035619889130644e-06, "loss": 0.4566, "step": 8364 }, { "epoch": 2.117185522652493, "grad_norm": 0.18227015435695648, "learning_rate": 6.033421866029081e-06, "loss": 0.4498, "step": 8365 }, { "epoch": 2.117438623133384, "grad_norm": 0.17653875052928925, "learning_rate": 6.031224070323026e-06, "loss": 0.4434, "step": 8366 }, { "epoch": 2.117691723614275, "grad_norm": 0.17439821362495422, "learning_rate": 6.029026502138477e-06, "loss": 0.4448, "step": 8367 }, { "epoch": 2.117944824095166, "grad_norm": 0.17788255214691162, "learning_rate": 6.026829161601415e-06, "loss": 0.452, "step": 8368 }, { "epoch": 2.1181979245760565, "grad_norm": 0.17545610666275024, "learning_rate": 6.024632048837813e-06, "loss": 0.4391, "step": 8369 }, { "epoch": 2.1184510250569475, "grad_norm": 0.17401672899723053, "learning_rate": 6.0224351639736185e-06, "loss": 0.4838, "step": 8370 }, { "epoch": 2.1187041255378385, "grad_norm": 0.18967293202877045, "learning_rate": 6.0202385071347795e-06, "loss": 0.4816, "step": 8371 }, { "epoch": 2.1189572260187295, "grad_norm": 0.179212287068367, "learning_rate": 6.018042078447226e-06, "loss": 0.4714, "step": 8372 }, { "epoch": 2.1192103264996205, "grad_norm": 0.1726207435131073, "learning_rate": 6.015845878036872e-06, "loss": 0.4381, "step": 8373 }, { "epoch": 2.119463426980511, "grad_norm": 0.1740562468767166, "learning_rate": 6.0136499060296215e-06, "loss": 0.4458, "step": 8374 }, { "epoch": 2.119716527461402, "grad_norm": 0.1730964332818985, "learning_rate": 6.011454162551371e-06, "loss": 0.4387, "step": 8375 }, { "epoch": 2.119969627942293, "grad_norm": 0.19486108422279358, "learning_rate": 6.009258647727989e-06, "loss": 0.4555, "step": 8376 }, { "epoch": 2.120222728423184, "grad_norm": 0.17872291803359985, "learning_rate": 6.007063361685341e-06, "loss": 0.4666, "step": 8377 }, { "epoch": 2.120475828904075, "grad_norm": 0.18093456327915192, "learning_rate": 6.004868304549284e-06, "loss": 0.4665, "step": 8378 }, { "epoch": 2.120728929384966, "grad_norm": 0.17959211766719818, "learning_rate": 6.00267347644565e-06, "loss": 0.4573, "step": 8379 }, { "epoch": 2.1209820298658566, "grad_norm": 0.1845996379852295, "learning_rate": 6.000478877500271e-06, "loss": 0.4375, "step": 8380 }, { "epoch": 2.1212351303467476, "grad_norm": 0.17615650594234467, "learning_rate": 5.998284507838951e-06, "loss": 0.4449, "step": 8381 }, { "epoch": 2.1214882308276386, "grad_norm": 0.17145149409770966, "learning_rate": 5.996090367587491e-06, "loss": 0.4417, "step": 8382 }, { "epoch": 2.1217413313085296, "grad_norm": 0.17877204716205597, "learning_rate": 5.993896456871675e-06, "loss": 0.4403, "step": 8383 }, { "epoch": 2.1219944317894206, "grad_norm": 0.18001484870910645, "learning_rate": 5.991702775817279e-06, "loss": 0.498, "step": 8384 }, { "epoch": 2.122247532270311, "grad_norm": 0.17393605411052704, "learning_rate": 5.989509324550063e-06, "loss": 0.4578, "step": 8385 }, { "epoch": 2.122500632751202, "grad_norm": 0.1790877878665924, "learning_rate": 5.987316103195769e-06, "loss": 0.4446, "step": 8386 }, { "epoch": 2.122753733232093, "grad_norm": 0.1790814995765686, "learning_rate": 5.985123111880129e-06, "loss": 0.4448, "step": 8387 }, { "epoch": 2.123006833712984, "grad_norm": 0.17476153373718262, "learning_rate": 5.982930350728866e-06, "loss": 0.4619, "step": 8388 }, { "epoch": 2.123259934193875, "grad_norm": 0.17833936214447021, "learning_rate": 5.980737819867684e-06, "loss": 0.4605, "step": 8389 }, { "epoch": 2.1235130346747657, "grad_norm": 0.17466962337493896, "learning_rate": 5.978545519422276e-06, "loss": 0.471, "step": 8390 }, { "epoch": 2.1237661351556567, "grad_norm": 0.17909640073776245, "learning_rate": 5.976353449518324e-06, "loss": 0.4627, "step": 8391 }, { "epoch": 2.1240192356365477, "grad_norm": 0.17440451681613922, "learning_rate": 5.974161610281495e-06, "loss": 0.4644, "step": 8392 }, { "epoch": 2.1242723361174387, "grad_norm": 0.17724819481372833, "learning_rate": 5.971970001837439e-06, "loss": 0.4572, "step": 8393 }, { "epoch": 2.1245254365983297, "grad_norm": 0.1712203472852707, "learning_rate": 5.969778624311799e-06, "loss": 0.4704, "step": 8394 }, { "epoch": 2.1247785370792203, "grad_norm": 0.1896282136440277, "learning_rate": 5.967587477830202e-06, "loss": 0.4464, "step": 8395 }, { "epoch": 2.1250316375601113, "grad_norm": 0.1780538558959961, "learning_rate": 5.965396562518261e-06, "loss": 0.4607, "step": 8396 }, { "epoch": 2.1252847380410023, "grad_norm": 0.2282477170228958, "learning_rate": 5.963205878501579e-06, "loss": 0.4654, "step": 8397 }, { "epoch": 2.1255378385218933, "grad_norm": 0.18205499649047852, "learning_rate": 5.961015425905739e-06, "loss": 0.4606, "step": 8398 }, { "epoch": 2.1257909390027843, "grad_norm": 0.17695969343185425, "learning_rate": 5.958825204856313e-06, "loss": 0.458, "step": 8399 }, { "epoch": 2.126044039483675, "grad_norm": 0.17260660231113434, "learning_rate": 5.956635215478869e-06, "loss": 0.4394, "step": 8400 }, { "epoch": 2.126297139964566, "grad_norm": 0.18141956627368927, "learning_rate": 5.954445457898951e-06, "loss": 0.4436, "step": 8401 }, { "epoch": 2.126550240445457, "grad_norm": 0.1969226896762848, "learning_rate": 5.9522559322420934e-06, "loss": 0.4606, "step": 8402 }, { "epoch": 2.126803340926348, "grad_norm": 0.18027213215827942, "learning_rate": 5.950066638633819e-06, "loss": 0.4533, "step": 8403 }, { "epoch": 2.127056441407239, "grad_norm": 0.1749209612607956, "learning_rate": 5.947877577199631e-06, "loss": 0.4415, "step": 8404 }, { "epoch": 2.1273095418881294, "grad_norm": 0.17314164340496063, "learning_rate": 5.945688748065026e-06, "loss": 0.4453, "step": 8405 }, { "epoch": 2.1275626423690204, "grad_norm": 0.17398366332054138, "learning_rate": 5.943500151355484e-06, "loss": 0.4617, "step": 8406 }, { "epoch": 2.1278157428499114, "grad_norm": 0.1779884546995163, "learning_rate": 5.941311787196474e-06, "loss": 0.4316, "step": 8407 }, { "epoch": 2.1280688433308024, "grad_norm": 0.18585239350795746, "learning_rate": 5.939123655713452e-06, "loss": 0.4625, "step": 8408 }, { "epoch": 2.1283219438116934, "grad_norm": 0.17755305767059326, "learning_rate": 5.936935757031857e-06, "loss": 0.4481, "step": 8409 }, { "epoch": 2.1285750442925844, "grad_norm": 0.17616887390613556, "learning_rate": 5.934748091277114e-06, "loss": 0.4823, "step": 8410 }, { "epoch": 2.128828144773475, "grad_norm": 0.1783132702112198, "learning_rate": 5.93256065857464e-06, "loss": 0.4604, "step": 8411 }, { "epoch": 2.129081245254366, "grad_norm": 0.1742076724767685, "learning_rate": 5.930373459049832e-06, "loss": 0.446, "step": 8412 }, { "epoch": 2.129334345735257, "grad_norm": 0.1748105138540268, "learning_rate": 5.928186492828086e-06, "loss": 0.4485, "step": 8413 }, { "epoch": 2.129587446216148, "grad_norm": 0.1777363419532776, "learning_rate": 5.92599976003477e-06, "loss": 0.4481, "step": 8414 }, { "epoch": 2.129840546697039, "grad_norm": 0.18269895017147064, "learning_rate": 5.9238132607952455e-06, "loss": 0.4749, "step": 8415 }, { "epoch": 2.1300936471779295, "grad_norm": 0.17527224123477936, "learning_rate": 5.921626995234859e-06, "loss": 0.4546, "step": 8416 }, { "epoch": 2.1303467476588205, "grad_norm": 0.1763608455657959, "learning_rate": 5.919440963478947e-06, "loss": 0.4453, "step": 8417 }, { "epoch": 2.1305998481397115, "grad_norm": 0.17034251987934113, "learning_rate": 5.917255165652829e-06, "loss": 0.4609, "step": 8418 }, { "epoch": 2.1308529486206025, "grad_norm": 0.18489407002925873, "learning_rate": 5.9150696018818135e-06, "loss": 0.4405, "step": 8419 }, { "epoch": 2.1311060491014935, "grad_norm": 0.17640143632888794, "learning_rate": 5.912884272291188e-06, "loss": 0.4452, "step": 8420 }, { "epoch": 2.131359149582384, "grad_norm": 0.17615000903606415, "learning_rate": 5.910699177006238e-06, "loss": 0.4507, "step": 8421 }, { "epoch": 2.131612250063275, "grad_norm": 0.1867872178554535, "learning_rate": 5.908514316152231e-06, "loss": 0.4465, "step": 8422 }, { "epoch": 2.131865350544166, "grad_norm": 0.17768846452236176, "learning_rate": 5.90632968985442e-06, "loss": 0.4653, "step": 8423 }, { "epoch": 2.132118451025057, "grad_norm": 0.1766105592250824, "learning_rate": 5.904145298238044e-06, "loss": 0.4519, "step": 8424 }, { "epoch": 2.132371551505948, "grad_norm": 0.17322121560573578, "learning_rate": 5.901961141428332e-06, "loss": 0.4316, "step": 8425 }, { "epoch": 2.1326246519868386, "grad_norm": 0.17248615622520447, "learning_rate": 5.899777219550492e-06, "loss": 0.4495, "step": 8426 }, { "epoch": 2.1328777524677296, "grad_norm": 0.17530350387096405, "learning_rate": 5.897593532729726e-06, "loss": 0.4818, "step": 8427 }, { "epoch": 2.1331308529486206, "grad_norm": 0.17081396281719208, "learning_rate": 5.8954100810912195e-06, "loss": 0.4482, "step": 8428 }, { "epoch": 2.1333839534295116, "grad_norm": 0.1745065152645111, "learning_rate": 5.893226864760148e-06, "loss": 0.4551, "step": 8429 }, { "epoch": 2.1336370539104026, "grad_norm": 0.17424257099628448, "learning_rate": 5.8910438838616714e-06, "loss": 0.4561, "step": 8430 }, { "epoch": 2.133890154391293, "grad_norm": 0.18562093377113342, "learning_rate": 5.88886113852093e-06, "loss": 0.4487, "step": 8431 }, { "epoch": 2.134143254872184, "grad_norm": 0.1787247359752655, "learning_rate": 5.88667862886306e-06, "loss": 0.4586, "step": 8432 }, { "epoch": 2.134396355353075, "grad_norm": 0.18047836422920227, "learning_rate": 5.884496355013179e-06, "loss": 0.4868, "step": 8433 }, { "epoch": 2.134649455833966, "grad_norm": 0.18147951364517212, "learning_rate": 5.882314317096388e-06, "loss": 0.4464, "step": 8434 }, { "epoch": 2.134902556314857, "grad_norm": 0.17091809213161469, "learning_rate": 5.880132515237785e-06, "loss": 0.4319, "step": 8435 }, { "epoch": 2.1351556567957477, "grad_norm": 0.17510230839252472, "learning_rate": 5.877950949562451e-06, "loss": 0.454, "step": 8436 }, { "epoch": 2.1354087572766387, "grad_norm": 0.17980137467384338, "learning_rate": 5.8757696201954385e-06, "loss": 0.4413, "step": 8437 }, { "epoch": 2.1356618577575297, "grad_norm": 0.32997334003448486, "learning_rate": 5.873588527261806e-06, "loss": 0.4428, "step": 8438 }, { "epoch": 2.1359149582384207, "grad_norm": 0.20324115455150604, "learning_rate": 5.871407670886592e-06, "loss": 0.434, "step": 8439 }, { "epoch": 2.1361680587193117, "grad_norm": 0.1735571324825287, "learning_rate": 5.869227051194818e-06, "loss": 0.4939, "step": 8440 }, { "epoch": 2.1364211592002027, "grad_norm": 0.1779181808233261, "learning_rate": 5.867046668311496e-06, "loss": 0.4596, "step": 8441 }, { "epoch": 2.1366742596810933, "grad_norm": 0.17518119513988495, "learning_rate": 5.864866522361621e-06, "loss": 0.4384, "step": 8442 }, { "epoch": 2.1369273601619843, "grad_norm": 0.17216022312641144, "learning_rate": 5.862686613470172e-06, "loss": 0.4539, "step": 8443 }, { "epoch": 2.1371804606428753, "grad_norm": 0.18594950437545776, "learning_rate": 5.860506941762128e-06, "loss": 0.4638, "step": 8444 }, { "epoch": 2.1374335611237663, "grad_norm": 0.1915060579776764, "learning_rate": 5.858327507362438e-06, "loss": 0.4633, "step": 8445 }, { "epoch": 2.1376866616046573, "grad_norm": 0.17728041112422943, "learning_rate": 5.856148310396048e-06, "loss": 0.4722, "step": 8446 }, { "epoch": 2.137939762085548, "grad_norm": 0.18072256445884705, "learning_rate": 5.853969350987887e-06, "loss": 0.4631, "step": 8447 }, { "epoch": 2.138192862566439, "grad_norm": 0.1729324758052826, "learning_rate": 5.851790629262866e-06, "loss": 0.4429, "step": 8448 }, { "epoch": 2.13844596304733, "grad_norm": 0.20112207531929016, "learning_rate": 5.849612145345889e-06, "loss": 0.4645, "step": 8449 }, { "epoch": 2.138699063528221, "grad_norm": 0.17374162375926971, "learning_rate": 5.847433899361842e-06, "loss": 0.4739, "step": 8450 }, { "epoch": 2.138952164009112, "grad_norm": 0.173701673746109, "learning_rate": 5.845255891435603e-06, "loss": 0.4843, "step": 8451 }, { "epoch": 2.1392052644900024, "grad_norm": 0.1719547063112259, "learning_rate": 5.843078121692032e-06, "loss": 0.4708, "step": 8452 }, { "epoch": 2.1394583649708934, "grad_norm": 0.17382852733135223, "learning_rate": 5.840900590255973e-06, "loss": 0.4606, "step": 8453 }, { "epoch": 2.1397114654517844, "grad_norm": 0.16966192424297333, "learning_rate": 5.83872329725226e-06, "loss": 0.4656, "step": 8454 }, { "epoch": 2.1399645659326754, "grad_norm": 0.1712973415851593, "learning_rate": 5.836546242805712e-06, "loss": 0.4515, "step": 8455 }, { "epoch": 2.1402176664135664, "grad_norm": 0.1752311885356903, "learning_rate": 5.834369427041138e-06, "loss": 0.4232, "step": 8456 }, { "epoch": 2.140470766894457, "grad_norm": 0.17439012229442596, "learning_rate": 5.832192850083327e-06, "loss": 0.4458, "step": 8457 }, { "epoch": 2.140723867375348, "grad_norm": 0.17316745221614838, "learning_rate": 5.830016512057064e-06, "loss": 0.4454, "step": 8458 }, { "epoch": 2.140976967856239, "grad_norm": 0.18130949139595032, "learning_rate": 5.827840413087106e-06, "loss": 0.4357, "step": 8459 }, { "epoch": 2.14123006833713, "grad_norm": 0.18465834856033325, "learning_rate": 5.825664553298206e-06, "loss": 0.4301, "step": 8460 }, { "epoch": 2.141483168818021, "grad_norm": 0.172604501247406, "learning_rate": 5.823488932815098e-06, "loss": 0.4391, "step": 8461 }, { "epoch": 2.1417362692989115, "grad_norm": 0.1780618280172348, "learning_rate": 5.821313551762515e-06, "loss": 0.4625, "step": 8462 }, { "epoch": 2.1419893697798025, "grad_norm": 0.17744643986225128, "learning_rate": 5.819138410265161e-06, "loss": 0.4589, "step": 8463 }, { "epoch": 2.1422424702606935, "grad_norm": 0.172369584441185, "learning_rate": 5.816963508447737e-06, "loss": 0.4536, "step": 8464 }, { "epoch": 2.1424955707415845, "grad_norm": 0.17506705224514008, "learning_rate": 5.81478884643492e-06, "loss": 0.4613, "step": 8465 }, { "epoch": 2.1427486712224755, "grad_norm": 0.1734200119972229, "learning_rate": 5.81261442435138e-06, "loss": 0.4526, "step": 8466 }, { "epoch": 2.143001771703366, "grad_norm": 0.1780119240283966, "learning_rate": 5.810440242321766e-06, "loss": 0.4257, "step": 8467 }, { "epoch": 2.143254872184257, "grad_norm": 0.1695406436920166, "learning_rate": 5.808266300470733e-06, "loss": 0.4251, "step": 8468 }, { "epoch": 2.143507972665148, "grad_norm": 0.17542986571788788, "learning_rate": 5.806092598922903e-06, "loss": 0.4687, "step": 8469 }, { "epoch": 2.143761073146039, "grad_norm": 0.18041695654392242, "learning_rate": 5.803919137802885e-06, "loss": 0.4328, "step": 8470 }, { "epoch": 2.14401417362693, "grad_norm": 0.1701427847146988, "learning_rate": 5.8017459172352815e-06, "loss": 0.4344, "step": 8471 }, { "epoch": 2.144267274107821, "grad_norm": 0.18110209703445435, "learning_rate": 5.79957293734468e-06, "loss": 0.4585, "step": 8472 }, { "epoch": 2.1445203745887116, "grad_norm": 0.1804247498512268, "learning_rate": 5.797400198255651e-06, "loss": 0.4618, "step": 8473 }, { "epoch": 2.1447734750696026, "grad_norm": 0.1800869256258011, "learning_rate": 5.795227700092751e-06, "loss": 0.4473, "step": 8474 }, { "epoch": 2.1450265755504936, "grad_norm": 0.18991832435131073, "learning_rate": 5.793055442980535e-06, "loss": 0.4332, "step": 8475 }, { "epoch": 2.1452796760313846, "grad_norm": 0.1797182559967041, "learning_rate": 5.790883427043522e-06, "loss": 0.4646, "step": 8476 }, { "epoch": 2.145532776512275, "grad_norm": 0.17562130093574524, "learning_rate": 5.788711652406233e-06, "loss": 0.4731, "step": 8477 }, { "epoch": 2.145785876993166, "grad_norm": 0.17591562867164612, "learning_rate": 5.7865401191931734e-06, "loss": 0.4446, "step": 8478 }, { "epoch": 2.146038977474057, "grad_norm": 0.18220049142837524, "learning_rate": 5.78436882752883e-06, "loss": 0.458, "step": 8479 }, { "epoch": 2.146292077954948, "grad_norm": 0.17575563490390778, "learning_rate": 5.782197777537681e-06, "loss": 0.475, "step": 8480 }, { "epoch": 2.146545178435839, "grad_norm": 0.17621667683124542, "learning_rate": 5.780026969344184e-06, "loss": 0.4622, "step": 8481 }, { "epoch": 2.14679827891673, "grad_norm": 0.1768469661474228, "learning_rate": 5.77785640307279e-06, "loss": 0.4724, "step": 8482 }, { "epoch": 2.1470513793976207, "grad_norm": 0.1792469024658203, "learning_rate": 5.775686078847933e-06, "loss": 0.47, "step": 8483 }, { "epoch": 2.1473044798785117, "grad_norm": 0.1894334852695465, "learning_rate": 5.773515996794032e-06, "loss": 0.4693, "step": 8484 }, { "epoch": 2.1475575803594027, "grad_norm": 0.1732577085494995, "learning_rate": 5.771346157035494e-06, "loss": 0.4373, "step": 8485 }, { "epoch": 2.1478106808402937, "grad_norm": 0.17886310815811157, "learning_rate": 5.76917655969671e-06, "loss": 0.4492, "step": 8486 }, { "epoch": 2.1480637813211847, "grad_norm": 0.1712307631969452, "learning_rate": 5.767007204902059e-06, "loss": 0.4392, "step": 8487 }, { "epoch": 2.1483168818020753, "grad_norm": 0.17671427130699158, "learning_rate": 5.764838092775906e-06, "loss": 0.4793, "step": 8488 }, { "epoch": 2.1485699822829663, "grad_norm": 0.1771697998046875, "learning_rate": 5.762669223442601e-06, "loss": 0.4598, "step": 8489 }, { "epoch": 2.1488230827638573, "grad_norm": 0.1768876165151596, "learning_rate": 5.760500597026481e-06, "loss": 0.4325, "step": 8490 }, { "epoch": 2.1490761832447483, "grad_norm": 0.17507484555244446, "learning_rate": 5.7583322136518734e-06, "loss": 0.4446, "step": 8491 }, { "epoch": 2.1493292837256393, "grad_norm": 0.18236151337623596, "learning_rate": 5.756164073443075e-06, "loss": 0.4509, "step": 8492 }, { "epoch": 2.14958238420653, "grad_norm": 0.1735764890909195, "learning_rate": 5.753996176524391e-06, "loss": 0.4475, "step": 8493 }, { "epoch": 2.149835484687421, "grad_norm": 0.17651492357254028, "learning_rate": 5.7518285230201e-06, "loss": 0.4446, "step": 8494 }, { "epoch": 2.150088585168312, "grad_norm": 0.1776290088891983, "learning_rate": 5.749661113054468e-06, "loss": 0.4363, "step": 8495 }, { "epoch": 2.150341685649203, "grad_norm": 0.1766502559185028, "learning_rate": 5.747493946751748e-06, "loss": 0.4599, "step": 8496 }, { "epoch": 2.150594786130094, "grad_norm": 0.17506460845470428, "learning_rate": 5.745327024236185e-06, "loss": 0.4736, "step": 8497 }, { "epoch": 2.1508478866109844, "grad_norm": 0.17554031312465668, "learning_rate": 5.743160345631988e-06, "loss": 0.4341, "step": 8498 }, { "epoch": 2.1511009870918754, "grad_norm": 0.18591471016407013, "learning_rate": 5.740993911063384e-06, "loss": 0.4499, "step": 8499 }, { "epoch": 2.1513540875727664, "grad_norm": 0.1711563915014267, "learning_rate": 5.738827720654565e-06, "loss": 0.4538, "step": 8500 }, { "epoch": 2.1516071880536574, "grad_norm": 0.182359978556633, "learning_rate": 5.736661774529713e-06, "loss": 0.4756, "step": 8501 }, { "epoch": 2.1518602885345484, "grad_norm": 0.1738852560520172, "learning_rate": 5.734496072813003e-06, "loss": 0.4813, "step": 8502 }, { "epoch": 2.1521133890154394, "grad_norm": 0.1736578643321991, "learning_rate": 5.7323306156285805e-06, "loss": 0.4647, "step": 8503 }, { "epoch": 2.15236648949633, "grad_norm": 0.19523918628692627, "learning_rate": 5.730165403100591e-06, "loss": 0.4673, "step": 8504 }, { "epoch": 2.152619589977221, "grad_norm": 0.17446036636829376, "learning_rate": 5.728000435353157e-06, "loss": 0.4462, "step": 8505 }, { "epoch": 2.152872690458112, "grad_norm": 0.17991261184215546, "learning_rate": 5.7258357125104e-06, "loss": 0.4515, "step": 8506 }, { "epoch": 2.153125790939003, "grad_norm": 0.17739011347293854, "learning_rate": 5.723671234696415e-06, "loss": 0.4607, "step": 8507 }, { "epoch": 2.1533788914198935, "grad_norm": 0.17914780974388123, "learning_rate": 5.7215070020352924e-06, "loss": 0.4553, "step": 8508 }, { "epoch": 2.1536319919007845, "grad_norm": 0.17444606125354767, "learning_rate": 5.719343014651093e-06, "loss": 0.471, "step": 8509 }, { "epoch": 2.1538850923816755, "grad_norm": 0.17933472990989685, "learning_rate": 5.717179272667879e-06, "loss": 0.4542, "step": 8510 }, { "epoch": 2.1541381928625665, "grad_norm": 0.17539632320404053, "learning_rate": 5.715015776209687e-06, "loss": 0.4519, "step": 8511 }, { "epoch": 2.1543912933434575, "grad_norm": 0.1766473352909088, "learning_rate": 5.7128525254005555e-06, "loss": 0.4666, "step": 8512 }, { "epoch": 2.1546443938243485, "grad_norm": 0.17588506639003754, "learning_rate": 5.710689520364499e-06, "loss": 0.4714, "step": 8513 }, { "epoch": 2.154897494305239, "grad_norm": 0.1749575138092041, "learning_rate": 5.708526761225508e-06, "loss": 0.4456, "step": 8514 }, { "epoch": 2.15515059478613, "grad_norm": 0.17650194466114044, "learning_rate": 5.706364248107577e-06, "loss": 0.4579, "step": 8515 }, { "epoch": 2.155403695267021, "grad_norm": 0.1809140145778656, "learning_rate": 5.704201981134674e-06, "loss": 0.4747, "step": 8516 }, { "epoch": 2.155656795747912, "grad_norm": 0.1802748292684555, "learning_rate": 5.7020399604307585e-06, "loss": 0.4711, "step": 8517 }, { "epoch": 2.155909896228803, "grad_norm": 0.18029479682445526, "learning_rate": 5.699878186119773e-06, "loss": 0.4728, "step": 8518 }, { "epoch": 2.1561629967096936, "grad_norm": 0.17701804637908936, "learning_rate": 5.697716658325656e-06, "loss": 0.4693, "step": 8519 }, { "epoch": 2.1564160971905846, "grad_norm": 0.1791103333234787, "learning_rate": 5.695555377172315e-06, "loss": 0.4565, "step": 8520 }, { "epoch": 2.1566691976714756, "grad_norm": 0.17522144317626953, "learning_rate": 5.693394342783652e-06, "loss": 0.4336, "step": 8521 }, { "epoch": 2.1569222981523666, "grad_norm": 0.17520640790462494, "learning_rate": 5.691233555283555e-06, "loss": 0.4547, "step": 8522 }, { "epoch": 2.1571753986332576, "grad_norm": 0.17458556592464447, "learning_rate": 5.6890730147958984e-06, "loss": 0.455, "step": 8523 }, { "epoch": 2.157428499114148, "grad_norm": 0.1768123060464859, "learning_rate": 5.6869127214445386e-06, "loss": 0.4385, "step": 8524 }, { "epoch": 2.157681599595039, "grad_norm": 0.1778554469347, "learning_rate": 5.684752675353331e-06, "loss": 0.4404, "step": 8525 }, { "epoch": 2.15793470007593, "grad_norm": 0.17595012485980988, "learning_rate": 5.682592876646094e-06, "loss": 0.4499, "step": 8526 }, { "epoch": 2.158187800556821, "grad_norm": 0.17867085337638855, "learning_rate": 5.68043332544665e-06, "loss": 0.4569, "step": 8527 }, { "epoch": 2.158440901037712, "grad_norm": 0.17729580402374268, "learning_rate": 5.6782740218788e-06, "loss": 0.4396, "step": 8528 }, { "epoch": 2.1586940015186027, "grad_norm": 0.1806289255619049, "learning_rate": 5.676114966066335e-06, "loss": 0.4425, "step": 8529 }, { "epoch": 2.1589471019994937, "grad_norm": 0.1818612962961197, "learning_rate": 5.673956158133025e-06, "loss": 0.4378, "step": 8530 }, { "epoch": 2.1592002024803847, "grad_norm": 0.17919990420341492, "learning_rate": 5.671797598202632e-06, "loss": 0.4647, "step": 8531 }, { "epoch": 2.1594533029612757, "grad_norm": 0.17390500009059906, "learning_rate": 5.669639286398901e-06, "loss": 0.4249, "step": 8532 }, { "epoch": 2.1597064034421667, "grad_norm": 0.17862056195735931, "learning_rate": 5.667481222845565e-06, "loss": 0.4466, "step": 8533 }, { "epoch": 2.1599595039230577, "grad_norm": 0.1786917746067047, "learning_rate": 5.66532340766634e-06, "loss": 0.4382, "step": 8534 }, { "epoch": 2.1602126044039482, "grad_norm": 0.18502533435821533, "learning_rate": 5.663165840984929e-06, "loss": 0.4396, "step": 8535 }, { "epoch": 2.1604657048848392, "grad_norm": 0.18148665130138397, "learning_rate": 5.66100852292502e-06, "loss": 0.4494, "step": 8536 }, { "epoch": 2.1607188053657302, "grad_norm": 0.17758800089359283, "learning_rate": 5.65885145361029e-06, "loss": 0.4288, "step": 8537 }, { "epoch": 2.1609719058466212, "grad_norm": 0.18082717061042786, "learning_rate": 5.656694633164397e-06, "loss": 0.4535, "step": 8538 }, { "epoch": 2.161225006327512, "grad_norm": 0.18144254386425018, "learning_rate": 5.6545380617109865e-06, "loss": 0.4577, "step": 8539 }, { "epoch": 2.161478106808403, "grad_norm": 0.1814727783203125, "learning_rate": 5.652381739373692e-06, "loss": 0.4981, "step": 8540 }, { "epoch": 2.161731207289294, "grad_norm": 0.17400047183036804, "learning_rate": 5.650225666276135e-06, "loss": 0.4569, "step": 8541 }, { "epoch": 2.161984307770185, "grad_norm": 0.18111345171928406, "learning_rate": 5.6480698425419036e-06, "loss": 0.4594, "step": 8542 }, { "epoch": 2.162237408251076, "grad_norm": 0.1868801712989807, "learning_rate": 5.645914268294601e-06, "loss": 0.458, "step": 8543 }, { "epoch": 2.162490508731967, "grad_norm": 0.17629927396774292, "learning_rate": 5.643758943657798e-06, "loss": 0.445, "step": 8544 }, { "epoch": 2.1627436092128574, "grad_norm": 0.1800999939441681, "learning_rate": 5.641603868755055e-06, "loss": 0.4691, "step": 8545 }, { "epoch": 2.1629967096937484, "grad_norm": 0.17953522503376007, "learning_rate": 5.639449043709915e-06, "loss": 0.4622, "step": 8546 }, { "epoch": 2.1632498101746394, "grad_norm": 0.17055989801883698, "learning_rate": 5.637294468645915e-06, "loss": 0.4555, "step": 8547 }, { "epoch": 2.1635029106555304, "grad_norm": 0.17421559989452362, "learning_rate": 5.635140143686567e-06, "loss": 0.4416, "step": 8548 }, { "epoch": 2.1637560111364214, "grad_norm": 0.1819169670343399, "learning_rate": 5.632986068955371e-06, "loss": 0.4385, "step": 8549 }, { "epoch": 2.164009111617312, "grad_norm": 0.18276339769363403, "learning_rate": 5.630832244575823e-06, "loss": 0.477, "step": 8550 }, { "epoch": 2.164262212098203, "grad_norm": 0.1806134581565857, "learning_rate": 5.628678670671397e-06, "loss": 0.4418, "step": 8551 }, { "epoch": 2.164515312579094, "grad_norm": 0.17703072726726532, "learning_rate": 5.626525347365552e-06, "loss": 0.4763, "step": 8552 }, { "epoch": 2.164768413059985, "grad_norm": 0.17893542349338531, "learning_rate": 5.624372274781728e-06, "loss": 0.4573, "step": 8553 }, { "epoch": 2.165021513540876, "grad_norm": 0.17855961620807648, "learning_rate": 5.622219453043361e-06, "loss": 0.4757, "step": 8554 }, { "epoch": 2.1652746140217665, "grad_norm": 0.18993249535560608, "learning_rate": 5.62006688227386e-06, "loss": 0.4472, "step": 8555 }, { "epoch": 2.1655277145026575, "grad_norm": 0.17846116423606873, "learning_rate": 5.617914562596639e-06, "loss": 0.4633, "step": 8556 }, { "epoch": 2.1657808149835485, "grad_norm": 0.17994104325771332, "learning_rate": 5.615762494135081e-06, "loss": 0.4486, "step": 8557 }, { "epoch": 2.1660339154644395, "grad_norm": 0.18346886336803436, "learning_rate": 5.613610677012562e-06, "loss": 0.4433, "step": 8558 }, { "epoch": 2.1662870159453305, "grad_norm": 0.21810448169708252, "learning_rate": 5.611459111352435e-06, "loss": 0.4579, "step": 8559 }, { "epoch": 2.166540116426221, "grad_norm": 0.17151550948619843, "learning_rate": 5.60930779727805e-06, "loss": 0.4423, "step": 8560 }, { "epoch": 2.166793216907112, "grad_norm": 0.18241733312606812, "learning_rate": 5.6071567349127325e-06, "loss": 0.4773, "step": 8561 }, { "epoch": 2.167046317388003, "grad_norm": 0.17853164672851562, "learning_rate": 5.605005924379798e-06, "loss": 0.4565, "step": 8562 }, { "epoch": 2.167299417868894, "grad_norm": 0.17362406849861145, "learning_rate": 5.6028553658025586e-06, "loss": 0.4432, "step": 8563 }, { "epoch": 2.167552518349785, "grad_norm": 0.1778854876756668, "learning_rate": 5.60070505930429e-06, "loss": 0.4637, "step": 8564 }, { "epoch": 2.1678056188306756, "grad_norm": 0.17393532395362854, "learning_rate": 5.598555005008268e-06, "loss": 0.4434, "step": 8565 }, { "epoch": 2.1680587193115666, "grad_norm": 0.17531448602676392, "learning_rate": 5.596405203037751e-06, "loss": 0.4481, "step": 8566 }, { "epoch": 2.1683118197924576, "grad_norm": 0.1760701835155487, "learning_rate": 5.5942556535159844e-06, "loss": 0.4448, "step": 8567 }, { "epoch": 2.1685649202733486, "grad_norm": 0.17497074604034424, "learning_rate": 5.592106356566194e-06, "loss": 0.4569, "step": 8568 }, { "epoch": 2.1688180207542396, "grad_norm": 0.17305819690227509, "learning_rate": 5.5899573123115955e-06, "loss": 0.462, "step": 8569 }, { "epoch": 2.16907112123513, "grad_norm": 0.1754063069820404, "learning_rate": 5.5878085208753904e-06, "loss": 0.4263, "step": 8570 }, { "epoch": 2.169324221716021, "grad_norm": 0.1755703240633011, "learning_rate": 5.585659982380763e-06, "loss": 0.4562, "step": 8571 }, { "epoch": 2.169577322196912, "grad_norm": 0.17406247556209564, "learning_rate": 5.583511696950885e-06, "loss": 0.4537, "step": 8572 }, { "epoch": 2.169830422677803, "grad_norm": 0.19524787366390228, "learning_rate": 5.581363664708914e-06, "loss": 0.4595, "step": 8573 }, { "epoch": 2.170083523158694, "grad_norm": 0.1797322928905487, "learning_rate": 5.57921588577799e-06, "loss": 0.4545, "step": 8574 }, { "epoch": 2.170336623639585, "grad_norm": 0.1882932037115097, "learning_rate": 5.577068360281241e-06, "loss": 0.4767, "step": 8575 }, { "epoch": 2.1705897241204757, "grad_norm": 0.18165552616119385, "learning_rate": 5.574921088341783e-06, "loss": 0.4656, "step": 8576 }, { "epoch": 2.1708428246013667, "grad_norm": 0.18035072088241577, "learning_rate": 5.572774070082711e-06, "loss": 0.4493, "step": 8577 }, { "epoch": 2.1710959250822577, "grad_norm": 0.17958422005176544, "learning_rate": 5.57062730562711e-06, "loss": 0.4848, "step": 8578 }, { "epoch": 2.1713490255631487, "grad_norm": 0.1775844246149063, "learning_rate": 5.56848079509805e-06, "loss": 0.4534, "step": 8579 }, { "epoch": 2.1716021260440397, "grad_norm": 0.18031223118305206, "learning_rate": 5.566334538618585e-06, "loss": 0.4493, "step": 8580 }, { "epoch": 2.1718552265249302, "grad_norm": 0.18299461901187897, "learning_rate": 5.564188536311759e-06, "loss": 0.4559, "step": 8581 }, { "epoch": 2.1721083270058212, "grad_norm": 0.18298573791980743, "learning_rate": 5.5620427883005926e-06, "loss": 0.4656, "step": 8582 }, { "epoch": 2.1723614274867122, "grad_norm": 0.17964394390583038, "learning_rate": 5.559897294708099e-06, "loss": 0.4545, "step": 8583 }, { "epoch": 2.1726145279676032, "grad_norm": 0.1732099950313568, "learning_rate": 5.557752055657275e-06, "loss": 0.4423, "step": 8584 }, { "epoch": 2.1728676284484942, "grad_norm": 0.18205790221691132, "learning_rate": 5.555607071271102e-06, "loss": 0.4511, "step": 8585 }, { "epoch": 2.173120728929385, "grad_norm": 0.18285396695137024, "learning_rate": 5.553462341672548e-06, "loss": 0.4822, "step": 8586 }, { "epoch": 2.173373829410276, "grad_norm": 0.1846364587545395, "learning_rate": 5.551317866984567e-06, "loss": 0.4589, "step": 8587 }, { "epoch": 2.173626929891167, "grad_norm": 0.17952483892440796, "learning_rate": 5.549173647330095e-06, "loss": 0.4691, "step": 8588 }, { "epoch": 2.173880030372058, "grad_norm": 0.20544421672821045, "learning_rate": 5.547029682832057e-06, "loss": 0.4363, "step": 8589 }, { "epoch": 2.174133130852949, "grad_norm": 0.18087294697761536, "learning_rate": 5.544885973613362e-06, "loss": 0.4368, "step": 8590 }, { "epoch": 2.1743862313338393, "grad_norm": 0.1824961006641388, "learning_rate": 5.5427425197969065e-06, "loss": 0.4683, "step": 8591 }, { "epoch": 2.1746393318147303, "grad_norm": 0.17647616565227509, "learning_rate": 5.540599321505564e-06, "loss": 0.4655, "step": 8592 }, { "epoch": 2.1748924322956213, "grad_norm": 0.17776431143283844, "learning_rate": 5.5384563788622e-06, "loss": 0.4628, "step": 8593 }, { "epoch": 2.1751455327765123, "grad_norm": 0.17459158599376678, "learning_rate": 5.5363136919896695e-06, "loss": 0.4552, "step": 8594 }, { "epoch": 2.1753986332574033, "grad_norm": 0.18055221438407898, "learning_rate": 5.534171261010809e-06, "loss": 0.4592, "step": 8595 }, { "epoch": 2.175651733738294, "grad_norm": 0.1768048256635666, "learning_rate": 5.532029086048436e-06, "loss": 0.4805, "step": 8596 }, { "epoch": 2.175904834219185, "grad_norm": 0.1777184158563614, "learning_rate": 5.529887167225361e-06, "loss": 0.4465, "step": 8597 }, { "epoch": 2.176157934700076, "grad_norm": 0.1758873164653778, "learning_rate": 5.527745504664369e-06, "loss": 0.4292, "step": 8598 }, { "epoch": 2.176411035180967, "grad_norm": 0.17608104646205902, "learning_rate": 5.525604098488241e-06, "loss": 0.442, "step": 8599 }, { "epoch": 2.176664135661858, "grad_norm": 0.17769941687583923, "learning_rate": 5.5234629488197335e-06, "loss": 0.4691, "step": 8600 }, { "epoch": 2.1769172361427485, "grad_norm": 0.17694604396820068, "learning_rate": 5.521322055781604e-06, "loss": 0.4552, "step": 8601 }, { "epoch": 2.1771703366236395, "grad_norm": 0.18399854004383087, "learning_rate": 5.519181419496585e-06, "loss": 0.4413, "step": 8602 }, { "epoch": 2.1774234371045305, "grad_norm": 0.17860867083072662, "learning_rate": 5.517041040087386e-06, "loss": 0.4618, "step": 8603 }, { "epoch": 2.1776765375854215, "grad_norm": 0.17657054960727692, "learning_rate": 5.514900917676715e-06, "loss": 0.4762, "step": 8604 }, { "epoch": 2.1779296380663125, "grad_norm": 0.1905509978532791, "learning_rate": 5.512761052387261e-06, "loss": 0.4583, "step": 8605 }, { "epoch": 2.1781827385472035, "grad_norm": 0.1792985051870346, "learning_rate": 5.510621444341693e-06, "loss": 0.4827, "step": 8606 }, { "epoch": 2.178435839028094, "grad_norm": 0.1793069988489151, "learning_rate": 5.508482093662677e-06, "loss": 0.4402, "step": 8607 }, { "epoch": 2.178688939508985, "grad_norm": 0.2056504786014557, "learning_rate": 5.50634300047286e-06, "loss": 0.4556, "step": 8608 }, { "epoch": 2.178942039989876, "grad_norm": 0.17425134778022766, "learning_rate": 5.504204164894863e-06, "loss": 0.4533, "step": 8609 }, { "epoch": 2.179195140470767, "grad_norm": 0.18514496088027954, "learning_rate": 5.502065587051304e-06, "loss": 0.4473, "step": 8610 }, { "epoch": 2.179448240951658, "grad_norm": 0.19141100347042084, "learning_rate": 5.499927267064784e-06, "loss": 0.4705, "step": 8611 }, { "epoch": 2.1797013414325486, "grad_norm": 0.1839684695005417, "learning_rate": 5.497789205057889e-06, "loss": 0.4373, "step": 8612 }, { "epoch": 2.1799544419134396, "grad_norm": 0.1775808185338974, "learning_rate": 5.495651401153187e-06, "loss": 0.4631, "step": 8613 }, { "epoch": 2.1802075423943306, "grad_norm": 0.17576681077480316, "learning_rate": 5.493513855473236e-06, "loss": 0.4473, "step": 8614 }, { "epoch": 2.1804606428752216, "grad_norm": 0.17866292595863342, "learning_rate": 5.491376568140576e-06, "loss": 0.4449, "step": 8615 }, { "epoch": 2.1807137433561126, "grad_norm": 0.17954111099243164, "learning_rate": 5.489239539277733e-06, "loss": 0.4681, "step": 8616 }, { "epoch": 2.180966843837003, "grad_norm": 0.183744415640831, "learning_rate": 5.48710276900722e-06, "loss": 0.4498, "step": 8617 }, { "epoch": 2.181219944317894, "grad_norm": 0.18260036408901215, "learning_rate": 5.484966257451532e-06, "loss": 0.4272, "step": 8618 }, { "epoch": 2.181473044798785, "grad_norm": 0.18774524331092834, "learning_rate": 5.4828300047331505e-06, "loss": 0.4559, "step": 8619 }, { "epoch": 2.181726145279676, "grad_norm": 0.17268237471580505, "learning_rate": 5.4806940109745436e-06, "loss": 0.461, "step": 8620 }, { "epoch": 2.181979245760567, "grad_norm": 0.18103137612342834, "learning_rate": 5.478558276298163e-06, "loss": 0.4907, "step": 8621 }, { "epoch": 2.1822323462414577, "grad_norm": 0.17750117182731628, "learning_rate": 5.476422800826443e-06, "loss": 0.4549, "step": 8622 }, { "epoch": 2.1824854467223487, "grad_norm": 0.1825050711631775, "learning_rate": 5.47428758468181e-06, "loss": 0.4554, "step": 8623 }, { "epoch": 2.1827385472032397, "grad_norm": 0.17626477777957916, "learning_rate": 5.472152627986669e-06, "loss": 0.4838, "step": 8624 }, { "epoch": 2.1829916476841307, "grad_norm": 0.17641779780387878, "learning_rate": 5.4700179308634135e-06, "loss": 0.4632, "step": 8625 }, { "epoch": 2.1832447481650217, "grad_norm": 0.1808522343635559, "learning_rate": 5.46788349343442e-06, "loss": 0.4542, "step": 8626 }, { "epoch": 2.1834978486459122, "grad_norm": 0.27046695351600647, "learning_rate": 5.465749315822053e-06, "loss": 0.478, "step": 8627 }, { "epoch": 2.1837509491268032, "grad_norm": 0.18287654221057892, "learning_rate": 5.463615398148659e-06, "loss": 0.4593, "step": 8628 }, { "epoch": 2.1840040496076942, "grad_norm": 0.17767629027366638, "learning_rate": 5.461481740536572e-06, "loss": 0.4744, "step": 8629 }, { "epoch": 2.1842571500885852, "grad_norm": 0.18406671285629272, "learning_rate": 5.459348343108113e-06, "loss": 0.4816, "step": 8630 }, { "epoch": 2.1845102505694762, "grad_norm": 0.18139377236366272, "learning_rate": 5.4572152059855745e-06, "loss": 0.4626, "step": 8631 }, { "epoch": 2.184763351050367, "grad_norm": 0.17415836453437805, "learning_rate": 5.455082329291255e-06, "loss": 0.4312, "step": 8632 }, { "epoch": 2.185016451531258, "grad_norm": 0.1831323206424713, "learning_rate": 5.452949713147425e-06, "loss": 0.439, "step": 8633 }, { "epoch": 2.185269552012149, "grad_norm": 0.1744723618030548, "learning_rate": 5.450817357676344e-06, "loss": 0.4714, "step": 8634 }, { "epoch": 2.18552265249304, "grad_norm": 0.17735889554023743, "learning_rate": 5.448685263000255e-06, "loss": 0.46, "step": 8635 }, { "epoch": 2.185775752973931, "grad_norm": 0.17814186215400696, "learning_rate": 5.446553429241383e-06, "loss": 0.4698, "step": 8636 }, { "epoch": 2.186028853454822, "grad_norm": 0.17645569145679474, "learning_rate": 5.44442185652194e-06, "loss": 0.4727, "step": 8637 }, { "epoch": 2.1862819539357123, "grad_norm": 0.18160204589366913, "learning_rate": 5.442290544964133e-06, "loss": 0.4596, "step": 8638 }, { "epoch": 2.1865350544166033, "grad_norm": 0.1723727434873581, "learning_rate": 5.44015949469014e-06, "loss": 0.4637, "step": 8639 }, { "epoch": 2.1867881548974943, "grad_norm": 0.17637726664543152, "learning_rate": 5.4380287058221316e-06, "loss": 0.4583, "step": 8640 }, { "epoch": 2.1870412553783853, "grad_norm": 0.18096163868904114, "learning_rate": 5.435898178482263e-06, "loss": 0.4542, "step": 8641 }, { "epoch": 2.1872943558592763, "grad_norm": 0.19763672351837158, "learning_rate": 5.433767912792667e-06, "loss": 0.4422, "step": 8642 }, { "epoch": 2.187547456340167, "grad_norm": 0.17655248939990997, "learning_rate": 5.431637908875471e-06, "loss": 0.4536, "step": 8643 }, { "epoch": 2.187800556821058, "grad_norm": 0.18993693590164185, "learning_rate": 5.429508166852778e-06, "loss": 0.444, "step": 8644 }, { "epoch": 2.188053657301949, "grad_norm": 0.17681972682476044, "learning_rate": 5.4273786868466915e-06, "loss": 0.4623, "step": 8645 }, { "epoch": 2.18830675778284, "grad_norm": 0.17781604826450348, "learning_rate": 5.425249468979286e-06, "loss": 0.4687, "step": 8646 }, { "epoch": 2.188559858263731, "grad_norm": 0.1800556480884552, "learning_rate": 5.423120513372626e-06, "loss": 0.4414, "step": 8647 }, { "epoch": 2.1888129587446215, "grad_norm": 0.17827315628528595, "learning_rate": 5.420991820148756e-06, "loss": 0.4566, "step": 8648 }, { "epoch": 2.1890660592255125, "grad_norm": 0.18012003600597382, "learning_rate": 5.418863389429712e-06, "loss": 0.4554, "step": 8649 }, { "epoch": 2.1893191597064035, "grad_norm": 0.17622306942939758, "learning_rate": 5.416735221337508e-06, "loss": 0.4356, "step": 8650 }, { "epoch": 2.1895722601872945, "grad_norm": 0.1766544133424759, "learning_rate": 5.4146073159941534e-06, "loss": 0.4467, "step": 8651 }, { "epoch": 2.1898253606681854, "grad_norm": 0.1821022480726242, "learning_rate": 5.4124796735216416e-06, "loss": 0.4575, "step": 8652 }, { "epoch": 2.190078461149076, "grad_norm": 0.17887713015079498, "learning_rate": 5.410352294041934e-06, "loss": 0.4753, "step": 8653 }, { "epoch": 2.190331561629967, "grad_norm": 0.17474187910556793, "learning_rate": 5.408225177676993e-06, "loss": 0.4687, "step": 8654 }, { "epoch": 2.190584662110858, "grad_norm": 0.1778896003961563, "learning_rate": 5.406098324548762e-06, "loss": 0.4199, "step": 8655 }, { "epoch": 2.190837762591749, "grad_norm": 0.1834876537322998, "learning_rate": 5.40397173477917e-06, "loss": 0.4453, "step": 8656 }, { "epoch": 2.19109086307264, "grad_norm": 0.17739155888557434, "learning_rate": 5.401845408490125e-06, "loss": 0.4395, "step": 8657 }, { "epoch": 2.1913439635535306, "grad_norm": 0.1737797111272812, "learning_rate": 5.3997193458035365e-06, "loss": 0.4355, "step": 8658 }, { "epoch": 2.1915970640344216, "grad_norm": 0.17526301741600037, "learning_rate": 5.397593546841276e-06, "loss": 0.4578, "step": 8659 }, { "epoch": 2.1918501645153126, "grad_norm": 0.1716025471687317, "learning_rate": 5.395468011725216e-06, "loss": 0.4665, "step": 8660 }, { "epoch": 2.1921032649962036, "grad_norm": 0.17998316884040833, "learning_rate": 5.393342740577206e-06, "loss": 0.4382, "step": 8661 }, { "epoch": 2.1923563654770946, "grad_norm": 0.17419536411762238, "learning_rate": 5.391217733519084e-06, "loss": 0.4193, "step": 8662 }, { "epoch": 2.192609465957985, "grad_norm": 0.18050573766231537, "learning_rate": 5.389092990672674e-06, "loss": 0.4495, "step": 8663 }, { "epoch": 2.192862566438876, "grad_norm": 0.1740347295999527, "learning_rate": 5.386968512159782e-06, "loss": 0.4464, "step": 8664 }, { "epoch": 2.193115666919767, "grad_norm": 0.17823611199855804, "learning_rate": 5.3848442981022005e-06, "loss": 0.4493, "step": 8665 }, { "epoch": 2.193368767400658, "grad_norm": 0.17850053310394287, "learning_rate": 5.382720348621704e-06, "loss": 0.4479, "step": 8666 }, { "epoch": 2.193621867881549, "grad_norm": 0.17936362326145172, "learning_rate": 5.380596663840056e-06, "loss": 0.4447, "step": 8667 }, { "epoch": 2.19387496836244, "grad_norm": 0.17897064983844757, "learning_rate": 5.378473243879003e-06, "loss": 0.4628, "step": 8668 }, { "epoch": 2.1941280688433307, "grad_norm": 0.1845979243516922, "learning_rate": 5.376350088860276e-06, "loss": 0.4612, "step": 8669 }, { "epoch": 2.1943811693242217, "grad_norm": 0.18232430517673492, "learning_rate": 5.374227198905591e-06, "loss": 0.4447, "step": 8670 }, { "epoch": 2.1946342698051127, "grad_norm": 0.17330050468444824, "learning_rate": 5.372104574136646e-06, "loss": 0.4499, "step": 8671 }, { "epoch": 2.1948873702860037, "grad_norm": 0.17978473007678986, "learning_rate": 5.3699822146751305e-06, "loss": 0.4753, "step": 8672 }, { "epoch": 2.1951404707668942, "grad_norm": 0.17920728027820587, "learning_rate": 5.367860120642712e-06, "loss": 0.4577, "step": 8673 }, { "epoch": 2.1953935712477852, "grad_norm": 0.18062840402126312, "learning_rate": 5.365738292161051e-06, "loss": 0.4602, "step": 8674 }, { "epoch": 2.1956466717286762, "grad_norm": 0.18354123830795288, "learning_rate": 5.363616729351777e-06, "loss": 0.4721, "step": 8675 }, { "epoch": 2.1958997722095672, "grad_norm": 0.17517070472240448, "learning_rate": 5.361495432336523e-06, "loss": 0.4597, "step": 8676 }, { "epoch": 2.1961528726904582, "grad_norm": 0.1839073896408081, "learning_rate": 5.359374401236899e-06, "loss": 0.467, "step": 8677 }, { "epoch": 2.1964059731713492, "grad_norm": 0.18293210864067078, "learning_rate": 5.357253636174494e-06, "loss": 0.466, "step": 8678 }, { "epoch": 2.19665907365224, "grad_norm": 0.17342276871204376, "learning_rate": 5.355133137270891e-06, "loss": 0.4626, "step": 8679 }, { "epoch": 2.196912174133131, "grad_norm": 0.1820446252822876, "learning_rate": 5.353012904647656e-06, "loss": 0.4439, "step": 8680 }, { "epoch": 2.197165274614022, "grad_norm": 0.17582736909389496, "learning_rate": 5.350892938426326e-06, "loss": 0.4418, "step": 8681 }, { "epoch": 2.197418375094913, "grad_norm": 0.17611609399318695, "learning_rate": 5.348773238728447e-06, "loss": 0.4484, "step": 8682 }, { "epoch": 2.197671475575804, "grad_norm": 0.19478529691696167, "learning_rate": 5.34665380567553e-06, "loss": 0.4687, "step": 8683 }, { "epoch": 2.1979245760566943, "grad_norm": 0.17899513244628906, "learning_rate": 5.3445346393890806e-06, "loss": 0.4642, "step": 8684 }, { "epoch": 2.1981776765375853, "grad_norm": 0.21987289190292358, "learning_rate": 5.3424157399905876e-06, "loss": 0.452, "step": 8685 }, { "epoch": 2.1984307770184763, "grad_norm": 0.1824129968881607, "learning_rate": 5.340297107601517e-06, "loss": 0.4416, "step": 8686 }, { "epoch": 2.1986838774993673, "grad_norm": 0.1742730289697647, "learning_rate": 5.33817874234333e-06, "loss": 0.4503, "step": 8687 }, { "epoch": 2.1989369779802583, "grad_norm": 0.18182289600372314, "learning_rate": 5.336060644337462e-06, "loss": 0.4735, "step": 8688 }, { "epoch": 2.199190078461149, "grad_norm": 0.18424570560455322, "learning_rate": 5.333942813705347e-06, "loss": 0.4788, "step": 8689 }, { "epoch": 2.19944317894204, "grad_norm": 0.18135061860084534, "learning_rate": 5.331825250568394e-06, "loss": 0.4518, "step": 8690 }, { "epoch": 2.199696279422931, "grad_norm": 0.17732568085193634, "learning_rate": 5.329707955048e-06, "loss": 0.4291, "step": 8691 }, { "epoch": 2.199949379903822, "grad_norm": 0.18519949913024902, "learning_rate": 5.327590927265539e-06, "loss": 0.4571, "step": 8692 }, { "epoch": 2.200202480384713, "grad_norm": 0.17606642842292786, "learning_rate": 5.325474167342379e-06, "loss": 0.4579, "step": 8693 }, { "epoch": 2.2004555808656034, "grad_norm": 0.1786988079547882, "learning_rate": 5.323357675399865e-06, "loss": 0.4622, "step": 8694 }, { "epoch": 2.2007086813464944, "grad_norm": 0.17501519620418549, "learning_rate": 5.321241451559338e-06, "loss": 0.4624, "step": 8695 }, { "epoch": 2.2009617818273854, "grad_norm": 0.1832471787929535, "learning_rate": 5.319125495942119e-06, "loss": 0.4616, "step": 8696 }, { "epoch": 2.2012148823082764, "grad_norm": 0.1773667186498642, "learning_rate": 5.317009808669502e-06, "loss": 0.4399, "step": 8697 }, { "epoch": 2.2014679827891674, "grad_norm": 0.17751538753509521, "learning_rate": 5.314894389862778e-06, "loss": 0.4573, "step": 8698 }, { "epoch": 2.2017210832700584, "grad_norm": 0.1833050400018692, "learning_rate": 5.312779239643221e-06, "loss": 0.4722, "step": 8699 }, { "epoch": 2.201974183750949, "grad_norm": 0.17650684714317322, "learning_rate": 5.3106643581320875e-06, "loss": 0.4362, "step": 8700 }, { "epoch": 2.20222728423184, "grad_norm": 0.17973077297210693, "learning_rate": 5.308549745450615e-06, "loss": 0.4457, "step": 8701 }, { "epoch": 2.202480384712731, "grad_norm": 0.1864868700504303, "learning_rate": 5.306435401720043e-06, "loss": 0.4532, "step": 8702 }, { "epoch": 2.202733485193622, "grad_norm": 0.18713629245758057, "learning_rate": 5.304321327061568e-06, "loss": 0.4839, "step": 8703 }, { "epoch": 2.2029865856745126, "grad_norm": 0.181891068816185, "learning_rate": 5.3022075215963894e-06, "loss": 0.4886, "step": 8704 }, { "epoch": 2.2032396861554036, "grad_norm": 0.17514269053936005, "learning_rate": 5.300093985445691e-06, "loss": 0.4248, "step": 8705 }, { "epoch": 2.2034927866362946, "grad_norm": 0.1754041165113449, "learning_rate": 5.297980718730633e-06, "loss": 0.4739, "step": 8706 }, { "epoch": 2.2037458871171856, "grad_norm": 0.1817319393157959, "learning_rate": 5.295867721572368e-06, "loss": 0.4566, "step": 8707 }, { "epoch": 2.2039989875980766, "grad_norm": 0.17698954045772552, "learning_rate": 5.293754994092026e-06, "loss": 0.4787, "step": 8708 }, { "epoch": 2.2042520880789676, "grad_norm": 0.18216721713542938, "learning_rate": 5.291642536410728e-06, "loss": 0.4536, "step": 8709 }, { "epoch": 2.204505188559858, "grad_norm": 0.17444442212581635, "learning_rate": 5.2895303486495755e-06, "loss": 0.4432, "step": 8710 }, { "epoch": 2.204758289040749, "grad_norm": 0.18514083325862885, "learning_rate": 5.2874184309296565e-06, "loss": 0.4493, "step": 8711 }, { "epoch": 2.20501138952164, "grad_norm": 0.18126091361045837, "learning_rate": 5.285306783372042e-06, "loss": 0.4315, "step": 8712 }, { "epoch": 2.205264490002531, "grad_norm": 0.1781802624464035, "learning_rate": 5.283195406097788e-06, "loss": 0.4668, "step": 8713 }, { "epoch": 2.205517590483422, "grad_norm": 0.17667821049690247, "learning_rate": 5.281084299227937e-06, "loss": 0.4545, "step": 8714 }, { "epoch": 2.2057706909643127, "grad_norm": 0.1801941990852356, "learning_rate": 5.278973462883512e-06, "loss": 0.439, "step": 8715 }, { "epoch": 2.2060237914452037, "grad_norm": 0.1773417592048645, "learning_rate": 5.2768628971855254e-06, "loss": 0.4464, "step": 8716 }, { "epoch": 2.2062768919260947, "grad_norm": 0.1780572235584259, "learning_rate": 5.274752602254969e-06, "loss": 0.4592, "step": 8717 }, { "epoch": 2.2065299924069857, "grad_norm": 0.1819145381450653, "learning_rate": 5.272642578212823e-06, "loss": 0.4505, "step": 8718 }, { "epoch": 2.2067830928878767, "grad_norm": 0.18362043797969818, "learning_rate": 5.27053282518005e-06, "loss": 0.4605, "step": 8719 }, { "epoch": 2.207036193368767, "grad_norm": 0.17678986489772797, "learning_rate": 5.268423343277598e-06, "loss": 0.4479, "step": 8720 }, { "epoch": 2.207289293849658, "grad_norm": 0.18069294095039368, "learning_rate": 5.266314132626398e-06, "loss": 0.4454, "step": 8721 }, { "epoch": 2.207542394330549, "grad_norm": 0.1823817640542984, "learning_rate": 5.264205193347368e-06, "loss": 0.4565, "step": 8722 }, { "epoch": 2.20779549481144, "grad_norm": 0.1787911355495453, "learning_rate": 5.262096525561408e-06, "loss": 0.4561, "step": 8723 }, { "epoch": 2.208048595292331, "grad_norm": 0.17995597422122955, "learning_rate": 5.259988129389408e-06, "loss": 0.4647, "step": 8724 }, { "epoch": 2.2083016957732218, "grad_norm": 0.1915629804134369, "learning_rate": 5.257880004952227e-06, "loss": 0.4818, "step": 8725 }, { "epoch": 2.2085547962541128, "grad_norm": 0.18093286454677582, "learning_rate": 5.2557721523707306e-06, "loss": 0.4585, "step": 8726 }, { "epoch": 2.2088078967350038, "grad_norm": 0.17968818545341492, "learning_rate": 5.2536645717657505e-06, "loss": 0.4767, "step": 8727 }, { "epoch": 2.2090609972158948, "grad_norm": 0.18368031084537506, "learning_rate": 5.251557263258113e-06, "loss": 0.4449, "step": 8728 }, { "epoch": 2.2093140976967858, "grad_norm": 0.17551620304584503, "learning_rate": 5.249450226968626e-06, "loss": 0.4497, "step": 8729 }, { "epoch": 2.2095671981776768, "grad_norm": 0.17990852892398834, "learning_rate": 5.247343463018085e-06, "loss": 0.4677, "step": 8730 }, { "epoch": 2.2098202986585673, "grad_norm": 0.17555415630340576, "learning_rate": 5.2452369715272566e-06, "loss": 0.454, "step": 8731 }, { "epoch": 2.2100733991394583, "grad_norm": 0.1741637885570526, "learning_rate": 5.243130752616903e-06, "loss": 0.4603, "step": 8732 }, { "epoch": 2.2103264996203493, "grad_norm": 0.18030598759651184, "learning_rate": 5.241024806407778e-06, "loss": 0.4744, "step": 8733 }, { "epoch": 2.2105796001012403, "grad_norm": 0.18238721787929535, "learning_rate": 5.238919133020604e-06, "loss": 0.4447, "step": 8734 }, { "epoch": 2.210832700582131, "grad_norm": 0.17711004614830017, "learning_rate": 5.2368137325761e-06, "loss": 0.4511, "step": 8735 }, { "epoch": 2.211085801063022, "grad_norm": 0.18035955727100372, "learning_rate": 5.234708605194959e-06, "loss": 0.4431, "step": 8736 }, { "epoch": 2.211338901543913, "grad_norm": 0.17837701737880707, "learning_rate": 5.232603750997864e-06, "loss": 0.4592, "step": 8737 }, { "epoch": 2.211592002024804, "grad_norm": 0.17892952263355255, "learning_rate": 5.2304991701054856e-06, "loss": 0.4815, "step": 8738 }, { "epoch": 2.211845102505695, "grad_norm": 0.18043093383312225, "learning_rate": 5.228394862638466e-06, "loss": 0.4587, "step": 8739 }, { "epoch": 2.212098202986586, "grad_norm": 0.1769050657749176, "learning_rate": 5.226290828717451e-06, "loss": 0.4546, "step": 8740 }, { "epoch": 2.2123513034674764, "grad_norm": 0.1839023381471634, "learning_rate": 5.224187068463061e-06, "loss": 0.4613, "step": 8741 }, { "epoch": 2.2126044039483674, "grad_norm": 0.1789177656173706, "learning_rate": 5.222083581995892e-06, "loss": 0.4434, "step": 8742 }, { "epoch": 2.2128575044292584, "grad_norm": 0.1822754442691803, "learning_rate": 5.219980369436537e-06, "loss": 0.457, "step": 8743 }, { "epoch": 2.2131106049101494, "grad_norm": 0.17768584191799164, "learning_rate": 5.217877430905567e-06, "loss": 0.4804, "step": 8744 }, { "epoch": 2.2133637053910404, "grad_norm": 0.18408367037773132, "learning_rate": 5.215774766523536e-06, "loss": 0.4619, "step": 8745 }, { "epoch": 2.213616805871931, "grad_norm": 0.18444867432117462, "learning_rate": 5.213672376410997e-06, "loss": 0.4783, "step": 8746 }, { "epoch": 2.213869906352822, "grad_norm": 0.17041972279548645, "learning_rate": 5.211570260688464e-06, "loss": 0.4326, "step": 8747 }, { "epoch": 2.214123006833713, "grad_norm": 0.18079231679439545, "learning_rate": 5.209468419476449e-06, "loss": 0.4568, "step": 8748 }, { "epoch": 2.214376107314604, "grad_norm": 0.1798410415649414, "learning_rate": 5.20736685289545e-06, "loss": 0.431, "step": 8749 }, { "epoch": 2.214629207795495, "grad_norm": 0.17420445382595062, "learning_rate": 5.205265561065942e-06, "loss": 0.4484, "step": 8750 }, { "epoch": 2.2148823082763855, "grad_norm": 0.1854090392589569, "learning_rate": 5.203164544108388e-06, "loss": 0.4488, "step": 8751 }, { "epoch": 2.2151354087572765, "grad_norm": 0.17520229518413544, "learning_rate": 5.201063802143236e-06, "loss": 0.4552, "step": 8752 }, { "epoch": 2.2153885092381675, "grad_norm": 0.18381448090076447, "learning_rate": 5.198963335290916e-06, "loss": 0.4548, "step": 8753 }, { "epoch": 2.2156416097190585, "grad_norm": 0.17894090712070465, "learning_rate": 5.1968631436718445e-06, "loss": 0.4382, "step": 8754 }, { "epoch": 2.2158947101999495, "grad_norm": 0.18172939121723175, "learning_rate": 5.1947632274064195e-06, "loss": 0.4365, "step": 8755 }, { "epoch": 2.21614781068084, "grad_norm": 0.17940956354141235, "learning_rate": 5.192663586615026e-06, "loss": 0.4425, "step": 8756 }, { "epoch": 2.216400911161731, "grad_norm": 0.17857718467712402, "learning_rate": 5.1905642214180315e-06, "loss": 0.4761, "step": 8757 }, { "epoch": 2.216654011642622, "grad_norm": 0.188728466629982, "learning_rate": 5.1884651319357895e-06, "loss": 0.4504, "step": 8758 }, { "epoch": 2.216907112123513, "grad_norm": 0.17825071513652802, "learning_rate": 5.186366318288633e-06, "loss": 0.4661, "step": 8759 }, { "epoch": 2.217160212604404, "grad_norm": 0.21525892615318298, "learning_rate": 5.1842677805968856e-06, "loss": 0.4453, "step": 8760 }, { "epoch": 2.2174133130852947, "grad_norm": 0.17867907881736755, "learning_rate": 5.1821695189808505e-06, "loss": 0.4426, "step": 8761 }, { "epoch": 2.2176664135661857, "grad_norm": 0.1762372851371765, "learning_rate": 5.180071533560818e-06, "loss": 0.441, "step": 8762 }, { "epoch": 2.2179195140470767, "grad_norm": 0.1776033639907837, "learning_rate": 5.17797382445706e-06, "loss": 0.4536, "step": 8763 }, { "epoch": 2.2181726145279677, "grad_norm": 0.228244349360466, "learning_rate": 5.175876391789834e-06, "loss": 0.443, "step": 8764 }, { "epoch": 2.2184257150088587, "grad_norm": 0.18043790757656097, "learning_rate": 5.17377923567938e-06, "loss": 0.4428, "step": 8765 }, { "epoch": 2.218678815489749, "grad_norm": 0.1830926090478897, "learning_rate": 5.171682356245926e-06, "loss": 0.447, "step": 8766 }, { "epoch": 2.21893191597064, "grad_norm": 0.17692451179027557, "learning_rate": 5.169585753609681e-06, "loss": 0.4476, "step": 8767 }, { "epoch": 2.219185016451531, "grad_norm": 0.17668373882770538, "learning_rate": 5.1674894278908414e-06, "loss": 0.4549, "step": 8768 }, { "epoch": 2.219438116932422, "grad_norm": 0.17769384384155273, "learning_rate": 5.1653933792095775e-06, "loss": 0.4544, "step": 8769 }, { "epoch": 2.219691217413313, "grad_norm": 0.17622432112693787, "learning_rate": 5.163297607686053e-06, "loss": 0.467, "step": 8770 }, { "epoch": 2.219944317894204, "grad_norm": 0.181500643491745, "learning_rate": 5.16120211344042e-06, "loss": 0.4793, "step": 8771 }, { "epoch": 2.2201974183750948, "grad_norm": 0.18235333263874054, "learning_rate": 5.159106896592806e-06, "loss": 0.4663, "step": 8772 }, { "epoch": 2.2204505188559858, "grad_norm": 0.17912815511226654, "learning_rate": 5.157011957263325e-06, "loss": 0.4574, "step": 8773 }, { "epoch": 2.2207036193368768, "grad_norm": 0.17551209032535553, "learning_rate": 5.154917295572079e-06, "loss": 0.4353, "step": 8774 }, { "epoch": 2.2209567198177678, "grad_norm": 0.1757105141878128, "learning_rate": 5.152822911639143e-06, "loss": 0.4482, "step": 8775 }, { "epoch": 2.2212098202986588, "grad_norm": 0.17892348766326904, "learning_rate": 5.150728805584584e-06, "loss": 0.4261, "step": 8776 }, { "epoch": 2.2214629207795493, "grad_norm": 0.18077152967453003, "learning_rate": 5.148634977528458e-06, "loss": 0.4514, "step": 8777 }, { "epoch": 2.2217160212604403, "grad_norm": 0.18202809989452362, "learning_rate": 5.1465414275908e-06, "loss": 0.4677, "step": 8778 }, { "epoch": 2.2219691217413313, "grad_norm": 0.1793474555015564, "learning_rate": 5.1444481558916255e-06, "loss": 0.4464, "step": 8779 }, { "epoch": 2.2222222222222223, "grad_norm": 0.17362961173057556, "learning_rate": 5.142355162550942e-06, "loss": 0.4253, "step": 8780 }, { "epoch": 2.2224753227031133, "grad_norm": 0.17595618963241577, "learning_rate": 5.1402624476887305e-06, "loss": 0.4506, "step": 8781 }, { "epoch": 2.222728423184004, "grad_norm": 0.18092553317546844, "learning_rate": 5.138170011424963e-06, "loss": 0.4318, "step": 8782 }, { "epoch": 2.222981523664895, "grad_norm": 0.18468903005123138, "learning_rate": 5.136077853879593e-06, "loss": 0.4605, "step": 8783 }, { "epoch": 2.223234624145786, "grad_norm": 0.1815265715122223, "learning_rate": 5.133985975172564e-06, "loss": 0.4409, "step": 8784 }, { "epoch": 2.223487724626677, "grad_norm": 0.1766311079263687, "learning_rate": 5.131894375423802e-06, "loss": 0.4846, "step": 8785 }, { "epoch": 2.223740825107568, "grad_norm": 0.1793510615825653, "learning_rate": 5.129803054753205e-06, "loss": 0.4712, "step": 8786 }, { "epoch": 2.2239939255884584, "grad_norm": 0.1771101951599121, "learning_rate": 5.127712013280668e-06, "loss": 0.4583, "step": 8787 }, { "epoch": 2.2242470260693494, "grad_norm": 0.18130049109458923, "learning_rate": 5.125621251126066e-06, "loss": 0.4601, "step": 8788 }, { "epoch": 2.2245001265502404, "grad_norm": 0.1814970076084137, "learning_rate": 5.123530768409254e-06, "loss": 0.4367, "step": 8789 }, { "epoch": 2.2247532270311314, "grad_norm": 0.18533822894096375, "learning_rate": 5.121440565250082e-06, "loss": 0.5045, "step": 8790 }, { "epoch": 2.2250063275120224, "grad_norm": 0.17547939717769623, "learning_rate": 5.119350641768379e-06, "loss": 0.4582, "step": 8791 }, { "epoch": 2.225259427992913, "grad_norm": 0.19379007816314697, "learning_rate": 5.117260998083946e-06, "loss": 0.4279, "step": 8792 }, { "epoch": 2.225512528473804, "grad_norm": 0.1775178462266922, "learning_rate": 5.115171634316583e-06, "loss": 0.4613, "step": 8793 }, { "epoch": 2.225765628954695, "grad_norm": 0.17699193954467773, "learning_rate": 5.113082550586068e-06, "loss": 0.4388, "step": 8794 }, { "epoch": 2.226018729435586, "grad_norm": 0.1761421114206314, "learning_rate": 5.110993747012165e-06, "loss": 0.4732, "step": 8795 }, { "epoch": 2.226271829916477, "grad_norm": 0.1807161569595337, "learning_rate": 5.1089052237146175e-06, "loss": 0.449, "step": 8796 }, { "epoch": 2.2265249303973675, "grad_norm": 0.18451085686683655, "learning_rate": 5.106816980813161e-06, "loss": 0.446, "step": 8797 }, { "epoch": 2.2267780308782585, "grad_norm": 0.17437253892421722, "learning_rate": 5.104729018427506e-06, "loss": 0.438, "step": 8798 }, { "epoch": 2.2270311313591495, "grad_norm": 0.18455001711845398, "learning_rate": 5.102641336677352e-06, "loss": 0.4843, "step": 8799 }, { "epoch": 2.2272842318400405, "grad_norm": 0.18019716441631317, "learning_rate": 5.100553935682382e-06, "loss": 0.4788, "step": 8800 }, { "epoch": 2.2275373323209315, "grad_norm": 0.19276300072669983, "learning_rate": 5.098466815562262e-06, "loss": 0.4759, "step": 8801 }, { "epoch": 2.2277904328018225, "grad_norm": 0.17896059155464172, "learning_rate": 5.096379976436641e-06, "loss": 0.4805, "step": 8802 }, { "epoch": 2.228043533282713, "grad_norm": 0.18231141567230225, "learning_rate": 5.094293418425156e-06, "loss": 0.4623, "step": 8803 }, { "epoch": 2.228296633763604, "grad_norm": 0.18359088897705078, "learning_rate": 5.092207141647422e-06, "loss": 0.4699, "step": 8804 }, { "epoch": 2.228549734244495, "grad_norm": 0.17894047498703003, "learning_rate": 5.090121146223042e-06, "loss": 0.4511, "step": 8805 }, { "epoch": 2.228802834725386, "grad_norm": 0.1834474354982376, "learning_rate": 5.088035432271602e-06, "loss": 0.4572, "step": 8806 }, { "epoch": 2.229055935206277, "grad_norm": 0.17726927995681763, "learning_rate": 5.085949999912672e-06, "loss": 0.4676, "step": 8807 }, { "epoch": 2.2293090356871677, "grad_norm": 0.18394780158996582, "learning_rate": 5.083864849265803e-06, "loss": 0.4433, "step": 8808 }, { "epoch": 2.2295621361680587, "grad_norm": 0.1816289871931076, "learning_rate": 5.081779980450534e-06, "loss": 0.4624, "step": 8809 }, { "epoch": 2.2298152366489497, "grad_norm": 0.18466739356517792, "learning_rate": 5.079695393586387e-06, "loss": 0.4533, "step": 8810 }, { "epoch": 2.2300683371298406, "grad_norm": 0.18084923923015594, "learning_rate": 5.077611088792865e-06, "loss": 0.4459, "step": 8811 }, { "epoch": 2.2303214376107316, "grad_norm": 0.18929049372673035, "learning_rate": 5.075527066189458e-06, "loss": 0.4655, "step": 8812 }, { "epoch": 2.230574538091622, "grad_norm": 0.1781688928604126, "learning_rate": 5.073443325895642e-06, "loss": 0.439, "step": 8813 }, { "epoch": 2.230827638572513, "grad_norm": 0.1781722754240036, "learning_rate": 5.071359868030863e-06, "loss": 0.4343, "step": 8814 }, { "epoch": 2.231080739053404, "grad_norm": 0.19957421720027924, "learning_rate": 5.069276692714571e-06, "loss": 0.4469, "step": 8815 }, { "epoch": 2.231333839534295, "grad_norm": 0.1723962277173996, "learning_rate": 5.067193800066188e-06, "loss": 0.4283, "step": 8816 }, { "epoch": 2.231586940015186, "grad_norm": 0.1772090047597885, "learning_rate": 5.0651111902051185e-06, "loss": 0.463, "step": 8817 }, { "epoch": 2.2318400404960768, "grad_norm": 0.17641112208366394, "learning_rate": 5.063028863250763e-06, "loss": 0.4388, "step": 8818 }, { "epoch": 2.2320931409769678, "grad_norm": 0.18094980716705322, "learning_rate": 5.060946819322484e-06, "loss": 0.4674, "step": 8819 }, { "epoch": 2.2323462414578588, "grad_norm": 0.17882631719112396, "learning_rate": 5.058865058539644e-06, "loss": 0.4535, "step": 8820 }, { "epoch": 2.2325993419387498, "grad_norm": 0.18018923699855804, "learning_rate": 5.0567835810215935e-06, "loss": 0.4474, "step": 8821 }, { "epoch": 2.2328524424196408, "grad_norm": 0.17735256254673004, "learning_rate": 5.054702386887653e-06, "loss": 0.4462, "step": 8822 }, { "epoch": 2.2331055429005313, "grad_norm": 0.1791037917137146, "learning_rate": 5.052621476257136e-06, "loss": 0.4616, "step": 8823 }, { "epoch": 2.2333586433814223, "grad_norm": 0.18308795988559723, "learning_rate": 5.0505408492493365e-06, "loss": 0.4547, "step": 8824 }, { "epoch": 2.2336117438623133, "grad_norm": 0.17727816104888916, "learning_rate": 5.04846050598353e-06, "loss": 0.4262, "step": 8825 }, { "epoch": 2.2338648443432043, "grad_norm": 0.18310388922691345, "learning_rate": 5.046380446578978e-06, "loss": 0.4373, "step": 8826 }, { "epoch": 2.2341179448240953, "grad_norm": 0.1807837039232254, "learning_rate": 5.044300671154925e-06, "loss": 0.4615, "step": 8827 }, { "epoch": 2.234371045304986, "grad_norm": 0.17896810173988342, "learning_rate": 5.042221179830604e-06, "loss": 0.4617, "step": 8828 }, { "epoch": 2.234624145785877, "grad_norm": 0.1746690422296524, "learning_rate": 5.04014197272523e-06, "loss": 0.4369, "step": 8829 }, { "epoch": 2.234877246266768, "grad_norm": 0.1818046271800995, "learning_rate": 5.038063049957993e-06, "loss": 0.4266, "step": 8830 }, { "epoch": 2.235130346747659, "grad_norm": 0.18162813782691956, "learning_rate": 5.035984411648075e-06, "loss": 0.4516, "step": 8831 }, { "epoch": 2.23538344722855, "grad_norm": 0.17667408287525177, "learning_rate": 5.033906057914642e-06, "loss": 0.4662, "step": 8832 }, { "epoch": 2.235636547709441, "grad_norm": 0.1735842078924179, "learning_rate": 5.031827988876835e-06, "loss": 0.4536, "step": 8833 }, { "epoch": 2.2358896481903314, "grad_norm": 0.1784031242132187, "learning_rate": 5.029750204653795e-06, "loss": 0.4343, "step": 8834 }, { "epoch": 2.2361427486712224, "grad_norm": 0.18249687552452087, "learning_rate": 5.027672705364636e-06, "loss": 0.4493, "step": 8835 }, { "epoch": 2.2363958491521134, "grad_norm": 0.17585699260234833, "learning_rate": 5.025595491128451e-06, "loss": 0.4554, "step": 8836 }, { "epoch": 2.2366489496330044, "grad_norm": 0.1837971806526184, "learning_rate": 5.023518562064323e-06, "loss": 0.4544, "step": 8837 }, { "epoch": 2.2369020501138954, "grad_norm": 0.17806090414524078, "learning_rate": 5.021441918291321e-06, "loss": 0.4269, "step": 8838 }, { "epoch": 2.237155150594786, "grad_norm": 0.19798335433006287, "learning_rate": 5.019365559928493e-06, "loss": 0.4761, "step": 8839 }, { "epoch": 2.237408251075677, "grad_norm": 0.19383622705936432, "learning_rate": 5.017289487094868e-06, "loss": 0.4647, "step": 8840 }, { "epoch": 2.237661351556568, "grad_norm": 0.18008051812648773, "learning_rate": 5.015213699909473e-06, "loss": 0.4399, "step": 8841 }, { "epoch": 2.237914452037459, "grad_norm": 0.18033795058727264, "learning_rate": 5.0131381984913005e-06, "loss": 0.4475, "step": 8842 }, { "epoch": 2.23816755251835, "grad_norm": 0.18588951230049133, "learning_rate": 5.011062982959337e-06, "loss": 0.4804, "step": 8843 }, { "epoch": 2.2384206529992405, "grad_norm": 0.17733289301395416, "learning_rate": 5.00898805343255e-06, "loss": 0.4542, "step": 8844 }, { "epoch": 2.2386737534801315, "grad_norm": 0.17948096990585327, "learning_rate": 5.00691341002989e-06, "loss": 0.4474, "step": 8845 }, { "epoch": 2.2389268539610225, "grad_norm": 0.17746026813983917, "learning_rate": 5.004839052870294e-06, "loss": 0.4452, "step": 8846 }, { "epoch": 2.2391799544419135, "grad_norm": 0.17696553468704224, "learning_rate": 5.002764982072678e-06, "loss": 0.4739, "step": 8847 }, { "epoch": 2.2394330549228045, "grad_norm": 0.18194669485092163, "learning_rate": 5.000691197755944e-06, "loss": 0.4481, "step": 8848 }, { "epoch": 2.239686155403695, "grad_norm": 0.17794343829154968, "learning_rate": 4.9986177000389814e-06, "loss": 0.4616, "step": 8849 }, { "epoch": 2.239939255884586, "grad_norm": 0.1767064481973648, "learning_rate": 4.996544489040655e-06, "loss": 0.4458, "step": 8850 }, { "epoch": 2.240192356365477, "grad_norm": 0.17756076157093048, "learning_rate": 4.994471564879821e-06, "loss": 0.4509, "step": 8851 }, { "epoch": 2.240445456846368, "grad_norm": 0.1768740713596344, "learning_rate": 4.992398927675314e-06, "loss": 0.4486, "step": 8852 }, { "epoch": 2.240698557327259, "grad_norm": 0.17742422223091125, "learning_rate": 4.990326577545953e-06, "loss": 0.4429, "step": 8853 }, { "epoch": 2.2409516578081496, "grad_norm": 0.17935128509998322, "learning_rate": 4.988254514610543e-06, "loss": 0.4199, "step": 8854 }, { "epoch": 2.2412047582890406, "grad_norm": 0.17568516731262207, "learning_rate": 4.986182738987871e-06, "loss": 0.4384, "step": 8855 }, { "epoch": 2.2414578587699316, "grad_norm": 0.18933092057704926, "learning_rate": 4.984111250796707e-06, "loss": 0.4758, "step": 8856 }, { "epoch": 2.2417109592508226, "grad_norm": 0.17780806124210358, "learning_rate": 4.9820400501558085e-06, "loss": 0.4294, "step": 8857 }, { "epoch": 2.2419640597317136, "grad_norm": 0.18259677290916443, "learning_rate": 4.979969137183902e-06, "loss": 0.4493, "step": 8858 }, { "epoch": 2.242217160212604, "grad_norm": 0.18346858024597168, "learning_rate": 4.97789851199972e-06, "loss": 0.4529, "step": 8859 }, { "epoch": 2.242470260693495, "grad_norm": 0.17705008387565613, "learning_rate": 4.975828174721964e-06, "loss": 0.4512, "step": 8860 }, { "epoch": 2.242723361174386, "grad_norm": 0.18560412526130676, "learning_rate": 4.97375812546932e-06, "loss": 0.4466, "step": 8861 }, { "epoch": 2.242976461655277, "grad_norm": 0.18176090717315674, "learning_rate": 4.9716883643604616e-06, "loss": 0.4515, "step": 8862 }, { "epoch": 2.243229562136168, "grad_norm": 0.17871837317943573, "learning_rate": 4.969618891514046e-06, "loss": 0.4487, "step": 8863 }, { "epoch": 2.243482662617059, "grad_norm": 0.1772594451904297, "learning_rate": 4.967549707048702e-06, "loss": 0.4607, "step": 8864 }, { "epoch": 2.2437357630979498, "grad_norm": 0.18283075094223022, "learning_rate": 4.965480811083062e-06, "loss": 0.4456, "step": 8865 }, { "epoch": 2.2439888635788408, "grad_norm": 0.1829938143491745, "learning_rate": 4.963412203735728e-06, "loss": 0.4487, "step": 8866 }, { "epoch": 2.2442419640597318, "grad_norm": 0.18093478679656982, "learning_rate": 4.961343885125288e-06, "loss": 0.4654, "step": 8867 }, { "epoch": 2.2444950645406228, "grad_norm": 0.17930111289024353, "learning_rate": 4.959275855370318e-06, "loss": 0.4279, "step": 8868 }, { "epoch": 2.2447481650215133, "grad_norm": 0.19462960958480835, "learning_rate": 4.957208114589368e-06, "loss": 0.4872, "step": 8869 }, { "epoch": 2.2450012655024043, "grad_norm": 0.18657249212265015, "learning_rate": 4.955140662900979e-06, "loss": 0.4674, "step": 8870 }, { "epoch": 2.2452543659832953, "grad_norm": 0.17767925560474396, "learning_rate": 4.953073500423671e-06, "loss": 0.4445, "step": 8871 }, { "epoch": 2.2455074664641863, "grad_norm": 0.17804113030433655, "learning_rate": 4.9510066272759575e-06, "loss": 0.4475, "step": 8872 }, { "epoch": 2.2457605669450773, "grad_norm": 0.17771689593791962, "learning_rate": 4.948940043576323e-06, "loss": 0.4609, "step": 8873 }, { "epoch": 2.2460136674259683, "grad_norm": 0.1798359453678131, "learning_rate": 4.946873749443245e-06, "loss": 0.4374, "step": 8874 }, { "epoch": 2.246266767906859, "grad_norm": 0.18787401914596558, "learning_rate": 4.944807744995174e-06, "loss": 0.4612, "step": 8875 }, { "epoch": 2.24651986838775, "grad_norm": 0.18311575055122375, "learning_rate": 4.94274203035055e-06, "loss": 0.4781, "step": 8876 }, { "epoch": 2.246772968868641, "grad_norm": 0.1778571605682373, "learning_rate": 4.940676605627794e-06, "loss": 0.4514, "step": 8877 }, { "epoch": 2.247026069349532, "grad_norm": 0.20085136592388153, "learning_rate": 4.93861147094532e-06, "loss": 0.4532, "step": 8878 }, { "epoch": 2.247279169830423, "grad_norm": 0.18321239948272705, "learning_rate": 4.936546626421519e-06, "loss": 0.4446, "step": 8879 }, { "epoch": 2.2475322703113134, "grad_norm": 0.17365166544914246, "learning_rate": 4.934482072174754e-06, "loss": 0.4278, "step": 8880 }, { "epoch": 2.2477853707922044, "grad_norm": 0.18072587251663208, "learning_rate": 4.932417808323388e-06, "loss": 0.4476, "step": 8881 }, { "epoch": 2.2480384712730954, "grad_norm": 0.1786397099494934, "learning_rate": 4.930353834985757e-06, "loss": 0.4168, "step": 8882 }, { "epoch": 2.2482915717539864, "grad_norm": 0.1901056468486786, "learning_rate": 4.928290152280188e-06, "loss": 0.4667, "step": 8883 }, { "epoch": 2.2485446722348774, "grad_norm": 0.18105754256248474, "learning_rate": 4.926226760324984e-06, "loss": 0.424, "step": 8884 }, { "epoch": 2.248797772715768, "grad_norm": 0.18525201082229614, "learning_rate": 4.924163659238442e-06, "loss": 0.453, "step": 8885 }, { "epoch": 2.249050873196659, "grad_norm": 0.1863795667886734, "learning_rate": 4.9221008491388286e-06, "loss": 0.4214, "step": 8886 }, { "epoch": 2.24930397367755, "grad_norm": 0.1861030012369156, "learning_rate": 4.920038330144401e-06, "loss": 0.4513, "step": 8887 }, { "epoch": 2.249557074158441, "grad_norm": 0.1738097220659256, "learning_rate": 4.917976102373401e-06, "loss": 0.4471, "step": 8888 }, { "epoch": 2.249810174639332, "grad_norm": 0.1856554001569748, "learning_rate": 4.91591416594405e-06, "loss": 0.446, "step": 8889 }, { "epoch": 2.2500632751202225, "grad_norm": 0.1821039766073227, "learning_rate": 4.913852520974556e-06, "loss": 0.4624, "step": 8890 }, { "epoch": 2.2503163756011135, "grad_norm": 0.18117450177669525, "learning_rate": 4.911791167583107e-06, "loss": 0.4415, "step": 8891 }, { "epoch": 2.2505694760820045, "grad_norm": 0.17789581418037415, "learning_rate": 4.909730105887878e-06, "loss": 0.4509, "step": 8892 }, { "epoch": 2.2508225765628955, "grad_norm": 0.19153495132923126, "learning_rate": 4.907669336007024e-06, "loss": 0.4631, "step": 8893 }, { "epoch": 2.2510756770437865, "grad_norm": 0.18006931245326996, "learning_rate": 4.905608858058685e-06, "loss": 0.4563, "step": 8894 }, { "epoch": 2.2513287775246775, "grad_norm": 0.17878636717796326, "learning_rate": 4.903548672160983e-06, "loss": 0.4509, "step": 8895 }, { "epoch": 2.251581878005568, "grad_norm": 0.17811869084835052, "learning_rate": 4.901488778432025e-06, "loss": 0.4393, "step": 8896 }, { "epoch": 2.251834978486459, "grad_norm": 0.17320677638053894, "learning_rate": 4.8994291769899e-06, "loss": 0.4482, "step": 8897 }, { "epoch": 2.25208807896735, "grad_norm": 0.1816738098859787, "learning_rate": 4.897369867952682e-06, "loss": 0.5274, "step": 8898 }, { "epoch": 2.252341179448241, "grad_norm": 0.18311111629009247, "learning_rate": 4.895310851438425e-06, "loss": 0.4576, "step": 8899 }, { "epoch": 2.2525942799291316, "grad_norm": 0.17628252506256104, "learning_rate": 4.893252127565168e-06, "loss": 0.4288, "step": 8900 }, { "epoch": 2.2528473804100226, "grad_norm": 0.1809850037097931, "learning_rate": 4.891193696450939e-06, "loss": 0.4523, "step": 8901 }, { "epoch": 2.2531004808909136, "grad_norm": 0.18255341053009033, "learning_rate": 4.889135558213729e-06, "loss": 0.449, "step": 8902 }, { "epoch": 2.2533535813718046, "grad_norm": 0.1863453984260559, "learning_rate": 4.887077712971541e-06, "loss": 0.4712, "step": 8903 }, { "epoch": 2.2536066818526956, "grad_norm": 0.18206307291984558, "learning_rate": 4.8850201608423405e-06, "loss": 0.4678, "step": 8904 }, { "epoch": 2.2538597823335866, "grad_norm": 0.17986738681793213, "learning_rate": 4.882962901944086e-06, "loss": 0.467, "step": 8905 }, { "epoch": 2.254112882814477, "grad_norm": 0.17555083334445953, "learning_rate": 4.880905936394713e-06, "loss": 0.4305, "step": 8906 }, { "epoch": 2.254365983295368, "grad_norm": 0.17749454081058502, "learning_rate": 4.8788492643121475e-06, "loss": 0.4325, "step": 8907 }, { "epoch": 2.254619083776259, "grad_norm": 0.18543067574501038, "learning_rate": 4.876792885814283e-06, "loss": 0.4507, "step": 8908 }, { "epoch": 2.25487218425715, "grad_norm": 0.17612245678901672, "learning_rate": 4.8747368010190185e-06, "loss": 0.4298, "step": 8909 }, { "epoch": 2.255125284738041, "grad_norm": 0.18105903267860413, "learning_rate": 4.872681010044222e-06, "loss": 0.4647, "step": 8910 }, { "epoch": 2.2553783852189317, "grad_norm": 0.18149277567863464, "learning_rate": 4.870625513007746e-06, "loss": 0.4336, "step": 8911 }, { "epoch": 2.2556314856998227, "grad_norm": 0.18120911717414856, "learning_rate": 4.86857031002743e-06, "loss": 0.4602, "step": 8912 }, { "epoch": 2.2558845861807137, "grad_norm": 0.18144652247428894, "learning_rate": 4.866515401221096e-06, "loss": 0.4645, "step": 8913 }, { "epoch": 2.2561376866616047, "grad_norm": 0.17699703574180603, "learning_rate": 4.8644607867065394e-06, "loss": 0.4428, "step": 8914 }, { "epoch": 2.2563907871424957, "grad_norm": 0.19175153970718384, "learning_rate": 4.86240646660155e-06, "loss": 0.4585, "step": 8915 }, { "epoch": 2.2566438876233863, "grad_norm": 0.17940855026245117, "learning_rate": 4.860352441023903e-06, "loss": 0.435, "step": 8916 }, { "epoch": 2.2568969881042773, "grad_norm": 0.18174362182617188, "learning_rate": 4.8582987100913494e-06, "loss": 0.4612, "step": 8917 }, { "epoch": 2.2571500885851683, "grad_norm": 0.18453457951545715, "learning_rate": 4.856245273921627e-06, "loss": 0.4516, "step": 8918 }, { "epoch": 2.2574031890660593, "grad_norm": 0.17682817578315735, "learning_rate": 4.854192132632447e-06, "loss": 0.4481, "step": 8919 }, { "epoch": 2.2576562895469503, "grad_norm": 0.18407021462917328, "learning_rate": 4.852139286341519e-06, "loss": 0.4767, "step": 8920 }, { "epoch": 2.257909390027841, "grad_norm": 0.18151253461837769, "learning_rate": 4.8500867351665235e-06, "loss": 0.4665, "step": 8921 }, { "epoch": 2.258162490508732, "grad_norm": 0.18186788260936737, "learning_rate": 4.848034479225128e-06, "loss": 0.4579, "step": 8922 }, { "epoch": 2.258415590989623, "grad_norm": 0.174190953373909, "learning_rate": 4.845982518634992e-06, "loss": 0.4579, "step": 8923 }, { "epoch": 2.258668691470514, "grad_norm": 0.18127062916755676, "learning_rate": 4.843930853513748e-06, "loss": 0.4697, "step": 8924 }, { "epoch": 2.258921791951405, "grad_norm": 0.18246690928936005, "learning_rate": 4.841879483979007e-06, "loss": 0.4304, "step": 8925 }, { "epoch": 2.259174892432296, "grad_norm": 0.1734320968389511, "learning_rate": 4.839828410148374e-06, "loss": 0.4476, "step": 8926 }, { "epoch": 2.2594279929131864, "grad_norm": 0.18030042946338654, "learning_rate": 4.8377776321394325e-06, "loss": 0.455, "step": 8927 }, { "epoch": 2.2596810933940774, "grad_norm": 0.1783810257911682, "learning_rate": 4.8357271500697444e-06, "loss": 0.4496, "step": 8928 }, { "epoch": 2.2599341938749684, "grad_norm": 0.177655428647995, "learning_rate": 4.833676964056872e-06, "loss": 0.4668, "step": 8929 }, { "epoch": 2.2601872943558594, "grad_norm": 0.1799294501543045, "learning_rate": 4.831627074218338e-06, "loss": 0.4334, "step": 8930 }, { "epoch": 2.26044039483675, "grad_norm": 0.18096613883972168, "learning_rate": 4.829577480671659e-06, "loss": 0.454, "step": 8931 }, { "epoch": 2.260693495317641, "grad_norm": 0.1847909390926361, "learning_rate": 4.827528183534336e-06, "loss": 0.4925, "step": 8932 }, { "epoch": 2.260946595798532, "grad_norm": 0.18073521554470062, "learning_rate": 4.825479182923849e-06, "loss": 0.47, "step": 8933 }, { "epoch": 2.261199696279423, "grad_norm": 0.17785798013210297, "learning_rate": 4.823430478957665e-06, "loss": 0.4609, "step": 8934 }, { "epoch": 2.261452796760314, "grad_norm": 0.18942703306674957, "learning_rate": 4.82138207175323e-06, "loss": 0.4725, "step": 8935 }, { "epoch": 2.261705897241205, "grad_norm": 0.19346246123313904, "learning_rate": 4.8193339614279765e-06, "loss": 0.5045, "step": 8936 }, { "epoch": 2.2619589977220955, "grad_norm": 0.18994303047657013, "learning_rate": 4.817286148099317e-06, "loss": 0.4476, "step": 8937 }, { "epoch": 2.2622120982029865, "grad_norm": 0.18212217092514038, "learning_rate": 4.815238631884649e-06, "loss": 0.4367, "step": 8938 }, { "epoch": 2.2624651986838775, "grad_norm": 0.17918868362903595, "learning_rate": 4.813191412901352e-06, "loss": 0.4462, "step": 8939 }, { "epoch": 2.2627182991647685, "grad_norm": 0.17896613478660583, "learning_rate": 4.811144491266789e-06, "loss": 0.429, "step": 8940 }, { "epoch": 2.2629713996456595, "grad_norm": 0.1837330311536789, "learning_rate": 4.809097867098306e-06, "loss": 0.4707, "step": 8941 }, { "epoch": 2.26322450012655, "grad_norm": 0.18227232992649078, "learning_rate": 4.807051540513231e-06, "loss": 0.4424, "step": 8942 }, { "epoch": 2.263477600607441, "grad_norm": 0.17880427837371826, "learning_rate": 4.805005511628875e-06, "loss": 0.4487, "step": 8943 }, { "epoch": 2.263730701088332, "grad_norm": 0.1734803169965744, "learning_rate": 4.802959780562534e-06, "loss": 0.4151, "step": 8944 }, { "epoch": 2.263983801569223, "grad_norm": 0.18541178107261658, "learning_rate": 4.800914347431483e-06, "loss": 0.4631, "step": 8945 }, { "epoch": 2.264236902050114, "grad_norm": 0.18229331076145172, "learning_rate": 4.798869212352985e-06, "loss": 0.4689, "step": 8946 }, { "epoch": 2.2644900025310046, "grad_norm": 0.17733101546764374, "learning_rate": 4.796824375444284e-06, "loss": 0.4492, "step": 8947 }, { "epoch": 2.2647431030118956, "grad_norm": 0.18836723268032074, "learning_rate": 4.7947798368226015e-06, "loss": 0.4486, "step": 8948 }, { "epoch": 2.2649962034927866, "grad_norm": 0.1802581399679184, "learning_rate": 4.792735596605151e-06, "loss": 0.4242, "step": 8949 }, { "epoch": 2.2652493039736776, "grad_norm": 0.20232856273651123, "learning_rate": 4.790691654909122e-06, "loss": 0.4665, "step": 8950 }, { "epoch": 2.2655024044545686, "grad_norm": 0.17812390625476837, "learning_rate": 4.788648011851694e-06, "loss": 0.4547, "step": 8951 }, { "epoch": 2.265755504935459, "grad_norm": 0.2062910497188568, "learning_rate": 4.786604667550018e-06, "loss": 0.4521, "step": 8952 }, { "epoch": 2.26600860541635, "grad_norm": 0.175776407122612, "learning_rate": 4.784561622121233e-06, "loss": 0.4489, "step": 8953 }, { "epoch": 2.266261705897241, "grad_norm": 0.1808004379272461, "learning_rate": 4.782518875682472e-06, "loss": 0.4553, "step": 8954 }, { "epoch": 2.266514806378132, "grad_norm": 0.1755741834640503, "learning_rate": 4.780476428350834e-06, "loss": 0.4566, "step": 8955 }, { "epoch": 2.266767906859023, "grad_norm": 0.1800907552242279, "learning_rate": 4.778434280243412e-06, "loss": 0.4315, "step": 8956 }, { "epoch": 2.267021007339914, "grad_norm": 0.18246451020240784, "learning_rate": 4.77639243147728e-06, "loss": 0.4722, "step": 8957 }, { "epoch": 2.2672741078208047, "grad_norm": 0.1787421852350235, "learning_rate": 4.774350882169486e-06, "loss": 0.4461, "step": 8958 }, { "epoch": 2.2675272083016957, "grad_norm": 0.1789620965719223, "learning_rate": 4.7723096324370675e-06, "loss": 0.4365, "step": 8959 }, { "epoch": 2.2677803087825867, "grad_norm": 0.1804952621459961, "learning_rate": 4.770268682397051e-06, "loss": 0.4581, "step": 8960 }, { "epoch": 2.2680334092634777, "grad_norm": 0.18360207974910736, "learning_rate": 4.768228032166439e-06, "loss": 0.4573, "step": 8961 }, { "epoch": 2.2682865097443683, "grad_norm": 0.1813296228647232, "learning_rate": 4.7661876818622184e-06, "loss": 0.4568, "step": 8962 }, { "epoch": 2.2685396102252593, "grad_norm": 0.1841687709093094, "learning_rate": 4.764147631601354e-06, "loss": 0.4517, "step": 8963 }, { "epoch": 2.2687927107061503, "grad_norm": 0.1781996637582779, "learning_rate": 4.762107881500797e-06, "loss": 0.4266, "step": 8964 }, { "epoch": 2.2690458111870413, "grad_norm": 0.17541168630123138, "learning_rate": 4.760068431677486e-06, "loss": 0.4334, "step": 8965 }, { "epoch": 2.2692989116679323, "grad_norm": 0.21232225000858307, "learning_rate": 4.758029282248332e-06, "loss": 0.4505, "step": 8966 }, { "epoch": 2.2695520121488233, "grad_norm": 0.1821281760931015, "learning_rate": 4.755990433330244e-06, "loss": 0.4663, "step": 8967 }, { "epoch": 2.269805112629714, "grad_norm": 0.18712440133094788, "learning_rate": 4.753951885040105e-06, "loss": 0.459, "step": 8968 }, { "epoch": 2.270058213110605, "grad_norm": 0.1830027848482132, "learning_rate": 4.75191363749477e-06, "loss": 0.4675, "step": 8969 }, { "epoch": 2.270311313591496, "grad_norm": 0.18353688716888428, "learning_rate": 4.7498756908110965e-06, "loss": 0.4498, "step": 8970 }, { "epoch": 2.270564414072387, "grad_norm": 0.19131840765476227, "learning_rate": 4.747838045105912e-06, "loss": 0.4438, "step": 8971 }, { "epoch": 2.2708175145532774, "grad_norm": 0.1848333775997162, "learning_rate": 4.745800700496026e-06, "loss": 0.4596, "step": 8972 }, { "epoch": 2.2710706150341684, "grad_norm": 0.3651049733161926, "learning_rate": 4.743763657098246e-06, "loss": 0.4554, "step": 8973 }, { "epoch": 2.2713237155150594, "grad_norm": 0.18539033830165863, "learning_rate": 4.741726915029349e-06, "loss": 0.4625, "step": 8974 }, { "epoch": 2.2715768159959504, "grad_norm": 0.17696842551231384, "learning_rate": 4.73969047440609e-06, "loss": 0.4413, "step": 8975 }, { "epoch": 2.2718299164768414, "grad_norm": 0.17631933093070984, "learning_rate": 4.737654335345217e-06, "loss": 0.4312, "step": 8976 }, { "epoch": 2.2720830169577324, "grad_norm": 0.1830076277256012, "learning_rate": 4.73561849796346e-06, "loss": 0.4524, "step": 8977 }, { "epoch": 2.272336117438623, "grad_norm": 0.18737167119979858, "learning_rate": 4.7335829623775265e-06, "loss": 0.4463, "step": 8978 }, { "epoch": 2.272589217919514, "grad_norm": 0.18130896985530853, "learning_rate": 4.73154772870411e-06, "loss": 0.4582, "step": 8979 }, { "epoch": 2.272842318400405, "grad_norm": 0.1828499436378479, "learning_rate": 4.729512797059888e-06, "loss": 0.4485, "step": 8980 }, { "epoch": 2.273095418881296, "grad_norm": 0.1922316998243332, "learning_rate": 4.727478167561515e-06, "loss": 0.4459, "step": 8981 }, { "epoch": 2.273348519362187, "grad_norm": 0.1809653490781784, "learning_rate": 4.725443840325636e-06, "loss": 0.4515, "step": 8982 }, { "epoch": 2.2736016198430775, "grad_norm": 0.17535094916820526, "learning_rate": 4.7234098154688735e-06, "loss": 0.4333, "step": 8983 }, { "epoch": 2.2738547203239685, "grad_norm": 0.1911088079214096, "learning_rate": 4.721376093107832e-06, "loss": 0.4656, "step": 8984 }, { "epoch": 2.2741078208048595, "grad_norm": 0.18769654631614685, "learning_rate": 4.719342673359102e-06, "loss": 0.4475, "step": 8985 }, { "epoch": 2.2743609212857505, "grad_norm": 0.19135110080242157, "learning_rate": 4.717309556339255e-06, "loss": 0.4514, "step": 8986 }, { "epoch": 2.2746140217666415, "grad_norm": 0.18000848591327667, "learning_rate": 4.715276742164846e-06, "loss": 0.4473, "step": 8987 }, { "epoch": 2.2748671222475325, "grad_norm": 0.17851220071315765, "learning_rate": 4.71324423095241e-06, "loss": 0.4424, "step": 8988 }, { "epoch": 2.275120222728423, "grad_norm": 0.18219783902168274, "learning_rate": 4.71121202281847e-06, "loss": 0.4416, "step": 8989 }, { "epoch": 2.275373323209314, "grad_norm": 0.18181926012039185, "learning_rate": 4.709180117879524e-06, "loss": 0.447, "step": 8990 }, { "epoch": 2.275626423690205, "grad_norm": 0.1737246811389923, "learning_rate": 4.707148516252059e-06, "loss": 0.4477, "step": 8991 }, { "epoch": 2.275879524171096, "grad_norm": 0.17570196092128754, "learning_rate": 4.705117218052543e-06, "loss": 0.4697, "step": 8992 }, { "epoch": 2.2761326246519866, "grad_norm": 0.18748004734516144, "learning_rate": 4.703086223397424e-06, "loss": 0.4795, "step": 8993 }, { "epoch": 2.2763857251328776, "grad_norm": 0.18247976899147034, "learning_rate": 4.701055532403137e-06, "loss": 0.4433, "step": 8994 }, { "epoch": 2.2766388256137686, "grad_norm": 0.18778769671916962, "learning_rate": 4.699025145186094e-06, "loss": 0.4599, "step": 8995 }, { "epoch": 2.2768919260946596, "grad_norm": 0.19669921696186066, "learning_rate": 4.6969950618627e-06, "loss": 0.4713, "step": 8996 }, { "epoch": 2.2771450265755506, "grad_norm": 0.17592206597328186, "learning_rate": 4.694965282549323e-06, "loss": 0.4549, "step": 8997 }, { "epoch": 2.2773981270564416, "grad_norm": 0.17793115973472595, "learning_rate": 4.6929358073623365e-06, "loss": 0.4474, "step": 8998 }, { "epoch": 2.277651227537332, "grad_norm": 0.17925697565078735, "learning_rate": 4.690906636418082e-06, "loss": 0.445, "step": 8999 }, { "epoch": 2.277904328018223, "grad_norm": 0.18008992075920105, "learning_rate": 4.6888777698328895e-06, "loss": 0.4494, "step": 9000 }, { "epoch": 2.278157428499114, "grad_norm": 0.174732968211174, "learning_rate": 4.68684920772307e-06, "loss": 0.4276, "step": 9001 }, { "epoch": 2.278410528980005, "grad_norm": 0.18035274744033813, "learning_rate": 4.684820950204912e-06, "loss": 0.4745, "step": 9002 }, { "epoch": 2.2786636294608957, "grad_norm": 0.17873699963092804, "learning_rate": 4.6827929973946904e-06, "loss": 0.4474, "step": 9003 }, { "epoch": 2.2789167299417867, "grad_norm": 0.1778581738471985, "learning_rate": 4.680765349408671e-06, "loss": 0.4448, "step": 9004 }, { "epoch": 2.2791698304226777, "grad_norm": 0.1887514889240265, "learning_rate": 4.6787380063630915e-06, "loss": 0.4695, "step": 9005 }, { "epoch": 2.2794229309035687, "grad_norm": 0.17921096086502075, "learning_rate": 4.676710968374174e-06, "loss": 0.4754, "step": 9006 }, { "epoch": 2.2796760313844597, "grad_norm": 0.18458589911460876, "learning_rate": 4.674684235558128e-06, "loss": 0.4608, "step": 9007 }, { "epoch": 2.2799291318653507, "grad_norm": 0.1883394867181778, "learning_rate": 4.672657808031135e-06, "loss": 0.4492, "step": 9008 }, { "epoch": 2.2801822323462413, "grad_norm": 0.1823904812335968, "learning_rate": 4.670631685909369e-06, "loss": 0.4485, "step": 9009 }, { "epoch": 2.2804353328271323, "grad_norm": 0.18061649799346924, "learning_rate": 4.6686058693089795e-06, "loss": 0.4697, "step": 9010 }, { "epoch": 2.2806884333080233, "grad_norm": 0.18067745864391327, "learning_rate": 4.666580358346111e-06, "loss": 0.4294, "step": 9011 }, { "epoch": 2.2809415337889143, "grad_norm": 0.17991229891777039, "learning_rate": 4.66455515313688e-06, "loss": 0.4374, "step": 9012 }, { "epoch": 2.2811946342698053, "grad_norm": 0.17943687736988068, "learning_rate": 4.662530253797382e-06, "loss": 0.438, "step": 9013 }, { "epoch": 2.281447734750696, "grad_norm": 0.176185742020607, "learning_rate": 4.660505660443701e-06, "loss": 0.4328, "step": 9014 }, { "epoch": 2.281700835231587, "grad_norm": 0.18224774301052094, "learning_rate": 4.658481373191904e-06, "loss": 0.4787, "step": 9015 }, { "epoch": 2.281953935712478, "grad_norm": 0.18011386692523956, "learning_rate": 4.656457392158037e-06, "loss": 0.4481, "step": 9016 }, { "epoch": 2.282207036193369, "grad_norm": 0.18837420642375946, "learning_rate": 4.654433717458135e-06, "loss": 0.4536, "step": 9017 }, { "epoch": 2.28246013667426, "grad_norm": 0.19027292728424072, "learning_rate": 4.652410349208213e-06, "loss": 0.4402, "step": 9018 }, { "epoch": 2.282713237155151, "grad_norm": 0.17527158558368683, "learning_rate": 4.650387287524258e-06, "loss": 0.4125, "step": 9019 }, { "epoch": 2.2829663376360414, "grad_norm": 0.18260499835014343, "learning_rate": 4.6483645325222525e-06, "loss": 0.4494, "step": 9020 }, { "epoch": 2.2832194381169324, "grad_norm": 0.18453764915466309, "learning_rate": 4.646342084318156e-06, "loss": 0.441, "step": 9021 }, { "epoch": 2.2834725385978234, "grad_norm": 0.18800610303878784, "learning_rate": 4.644319943027912e-06, "loss": 0.4579, "step": 9022 }, { "epoch": 2.2837256390787144, "grad_norm": 0.17279265820980072, "learning_rate": 4.642298108767445e-06, "loss": 0.4518, "step": 9023 }, { "epoch": 2.283978739559605, "grad_norm": 0.18671512603759766, "learning_rate": 4.640276581652661e-06, "loss": 0.4567, "step": 9024 }, { "epoch": 2.284231840040496, "grad_norm": 0.18209034204483032, "learning_rate": 4.638255361799452e-06, "loss": 0.4874, "step": 9025 }, { "epoch": 2.284484940521387, "grad_norm": 0.20432208478450775, "learning_rate": 4.63623444932369e-06, "loss": 0.4745, "step": 9026 }, { "epoch": 2.284738041002278, "grad_norm": 0.18372300267219543, "learning_rate": 4.634213844341229e-06, "loss": 0.4265, "step": 9027 }, { "epoch": 2.284991141483169, "grad_norm": 0.1834096759557724, "learning_rate": 4.632193546967906e-06, "loss": 0.4681, "step": 9028 }, { "epoch": 2.28524424196406, "grad_norm": 0.1823003888130188, "learning_rate": 4.6301735573195405e-06, "loss": 0.4344, "step": 9029 }, { "epoch": 2.2854973424449505, "grad_norm": 0.18881678581237793, "learning_rate": 4.6281538755119345e-06, "loss": 0.4422, "step": 9030 }, { "epoch": 2.2857504429258415, "grad_norm": 0.18022942543029785, "learning_rate": 4.626134501660871e-06, "loss": 0.4291, "step": 9031 }, { "epoch": 2.2860035434067325, "grad_norm": 0.17673566937446594, "learning_rate": 4.624115435882118e-06, "loss": 0.4393, "step": 9032 }, { "epoch": 2.2862566438876235, "grad_norm": 0.17966878414154053, "learning_rate": 4.6220966782914244e-06, "loss": 0.4632, "step": 9033 }, { "epoch": 2.286509744368514, "grad_norm": 0.18074971437454224, "learning_rate": 4.620078229004519e-06, "loss": 0.4617, "step": 9034 }, { "epoch": 2.286762844849405, "grad_norm": 0.18320412933826447, "learning_rate": 4.618060088137118e-06, "loss": 0.468, "step": 9035 }, { "epoch": 2.287015945330296, "grad_norm": 0.21719491481781006, "learning_rate": 4.616042255804913e-06, "loss": 0.4438, "step": 9036 }, { "epoch": 2.287269045811187, "grad_norm": 0.20070548355579376, "learning_rate": 4.614024732123588e-06, "loss": 0.4293, "step": 9037 }, { "epoch": 2.287522146292078, "grad_norm": 0.18231317400932312, "learning_rate": 4.612007517208799e-06, "loss": 0.4481, "step": 9038 }, { "epoch": 2.287775246772969, "grad_norm": 0.18275532126426697, "learning_rate": 4.609990611176189e-06, "loss": 0.4397, "step": 9039 }, { "epoch": 2.2880283472538596, "grad_norm": 0.18015937507152557, "learning_rate": 4.607974014141387e-06, "loss": 0.4525, "step": 9040 }, { "epoch": 2.2882814477347506, "grad_norm": 0.18651239573955536, "learning_rate": 4.60595772621999e-06, "loss": 0.4368, "step": 9041 }, { "epoch": 2.2885345482156416, "grad_norm": 0.2085961401462555, "learning_rate": 4.603941747527599e-06, "loss": 0.4622, "step": 9042 }, { "epoch": 2.2887876486965326, "grad_norm": 0.1853904277086258, "learning_rate": 4.601926078179782e-06, "loss": 0.4804, "step": 9043 }, { "epoch": 2.2890407491774236, "grad_norm": 0.17608554661273956, "learning_rate": 4.599910718292091e-06, "loss": 0.4582, "step": 9044 }, { "epoch": 2.289293849658314, "grad_norm": 0.1802775263786316, "learning_rate": 4.597895667980064e-06, "loss": 0.4786, "step": 9045 }, { "epoch": 2.289546950139205, "grad_norm": 0.18450823426246643, "learning_rate": 4.595880927359223e-06, "loss": 0.4508, "step": 9046 }, { "epoch": 2.289800050620096, "grad_norm": 0.18105743825435638, "learning_rate": 4.593866496545058e-06, "loss": 0.4344, "step": 9047 }, { "epoch": 2.290053151100987, "grad_norm": 0.17951272428035736, "learning_rate": 4.5918523756530605e-06, "loss": 0.4686, "step": 9048 }, { "epoch": 2.290306251581878, "grad_norm": 0.17924746870994568, "learning_rate": 4.589838564798696e-06, "loss": 0.4376, "step": 9049 }, { "epoch": 2.2905593520627687, "grad_norm": 0.17977429926395416, "learning_rate": 4.587825064097411e-06, "loss": 0.4653, "step": 9050 }, { "epoch": 2.2908124525436597, "grad_norm": 0.18084610998630524, "learning_rate": 4.5858118736646375e-06, "loss": 0.4558, "step": 9051 }, { "epoch": 2.2910655530245507, "grad_norm": 0.1790851503610611, "learning_rate": 4.58379899361578e-06, "loss": 0.4576, "step": 9052 }, { "epoch": 2.2913186535054417, "grad_norm": 0.17925719916820526, "learning_rate": 4.581786424066238e-06, "loss": 0.4589, "step": 9053 }, { "epoch": 2.2915717539863327, "grad_norm": 0.18166010081768036, "learning_rate": 4.579774165131383e-06, "loss": 0.4754, "step": 9054 }, { "epoch": 2.2918248544672233, "grad_norm": 0.17770490050315857, "learning_rate": 4.577762216926581e-06, "loss": 0.4573, "step": 9055 }, { "epoch": 2.2920779549481143, "grad_norm": 0.1805688440799713, "learning_rate": 4.57575057956717e-06, "loss": 0.4536, "step": 9056 }, { "epoch": 2.2923310554290053, "grad_norm": 0.18168511986732483, "learning_rate": 4.573739253168476e-06, "loss": 0.4452, "step": 9057 }, { "epoch": 2.2925841559098963, "grad_norm": 0.18148237466812134, "learning_rate": 4.571728237845795e-06, "loss": 0.4738, "step": 9058 }, { "epoch": 2.2928372563907873, "grad_norm": 0.17839911580085754, "learning_rate": 4.56971753371442e-06, "loss": 0.4372, "step": 9059 }, { "epoch": 2.2930903568716783, "grad_norm": 0.1851043403148651, "learning_rate": 4.5677071408896205e-06, "loss": 0.4246, "step": 9060 }, { "epoch": 2.293343457352569, "grad_norm": 0.17563055455684662, "learning_rate": 4.565697059486643e-06, "loss": 0.429, "step": 9061 }, { "epoch": 2.29359655783346, "grad_norm": 0.18199588358402252, "learning_rate": 4.563687289620734e-06, "loss": 0.4392, "step": 9062 }, { "epoch": 2.293849658314351, "grad_norm": 0.17857129871845245, "learning_rate": 4.561677831407096e-06, "loss": 0.4492, "step": 9063 }, { "epoch": 2.294102758795242, "grad_norm": 0.1861557513475418, "learning_rate": 4.559668684960933e-06, "loss": 0.4264, "step": 9064 }, { "epoch": 2.2943558592761324, "grad_norm": 0.19528473913669586, "learning_rate": 4.557659850397423e-06, "loss": 0.45, "step": 9065 }, { "epoch": 2.2946089597570234, "grad_norm": 0.18134376406669617, "learning_rate": 4.555651327831729e-06, "loss": 0.4338, "step": 9066 }, { "epoch": 2.2948620602379144, "grad_norm": 0.1816510707139969, "learning_rate": 4.553643117378993e-06, "loss": 0.4704, "step": 9067 }, { "epoch": 2.2951151607188054, "grad_norm": 0.1861078143119812, "learning_rate": 4.551635219154351e-06, "loss": 0.4466, "step": 9068 }, { "epoch": 2.2953682611996964, "grad_norm": 0.28124335408210754, "learning_rate": 4.5496276332729015e-06, "loss": 0.4553, "step": 9069 }, { "epoch": 2.2956213616805874, "grad_norm": 0.18027129769325256, "learning_rate": 4.5476203598497385e-06, "loss": 0.4691, "step": 9070 }, { "epoch": 2.295874462161478, "grad_norm": 0.18507058918476105, "learning_rate": 4.545613398999933e-06, "loss": 0.4398, "step": 9071 }, { "epoch": 2.296127562642369, "grad_norm": 0.18511444330215454, "learning_rate": 4.543606750838542e-06, "loss": 0.4537, "step": 9072 }, { "epoch": 2.29638066312326, "grad_norm": 0.17672760784626007, "learning_rate": 4.541600415480602e-06, "loss": 0.4519, "step": 9073 }, { "epoch": 2.296633763604151, "grad_norm": 0.17964649200439453, "learning_rate": 4.53959439304113e-06, "loss": 0.479, "step": 9074 }, { "epoch": 2.296886864085042, "grad_norm": 0.17436207830905914, "learning_rate": 4.53758868363513e-06, "loss": 0.4223, "step": 9075 }, { "epoch": 2.2971399645659325, "grad_norm": 0.17844967544078827, "learning_rate": 4.535583287377583e-06, "loss": 0.4327, "step": 9076 }, { "epoch": 2.2973930650468235, "grad_norm": 0.18145307898521423, "learning_rate": 4.5335782043834554e-06, "loss": 0.4521, "step": 9077 }, { "epoch": 2.2976461655277145, "grad_norm": 0.18486566841602325, "learning_rate": 4.531573434767693e-06, "loss": 0.4419, "step": 9078 }, { "epoch": 2.2978992660086055, "grad_norm": 0.1811509132385254, "learning_rate": 4.529568978645227e-06, "loss": 0.459, "step": 9079 }, { "epoch": 2.2981523664894965, "grad_norm": 0.1956733614206314, "learning_rate": 4.527564836130967e-06, "loss": 0.4434, "step": 9080 }, { "epoch": 2.298405466970387, "grad_norm": 0.18635638058185577, "learning_rate": 4.525561007339805e-06, "loss": 0.4761, "step": 9081 }, { "epoch": 2.298658567451278, "grad_norm": 0.20793300867080688, "learning_rate": 4.523557492386619e-06, "loss": 0.4553, "step": 9082 }, { "epoch": 2.298911667932169, "grad_norm": 0.19307458400726318, "learning_rate": 4.521554291386265e-06, "loss": 0.452, "step": 9083 }, { "epoch": 2.29916476841306, "grad_norm": 0.17791177332401276, "learning_rate": 4.519551404453586e-06, "loss": 0.4525, "step": 9084 }, { "epoch": 2.299417868893951, "grad_norm": 0.18157261610031128, "learning_rate": 4.517548831703392e-06, "loss": 0.4607, "step": 9085 }, { "epoch": 2.2996709693748416, "grad_norm": 0.17536233365535736, "learning_rate": 4.515546573250499e-06, "loss": 0.4452, "step": 9086 }, { "epoch": 2.2999240698557326, "grad_norm": 0.18024981021881104, "learning_rate": 4.513544629209685e-06, "loss": 0.4217, "step": 9087 }, { "epoch": 2.3001771703366236, "grad_norm": 0.18602637946605682, "learning_rate": 4.51154299969572e-06, "loss": 0.4588, "step": 9088 }, { "epoch": 2.3004302708175146, "grad_norm": 0.18083621561527252, "learning_rate": 4.509541684823353e-06, "loss": 0.4595, "step": 9089 }, { "epoch": 2.3006833712984056, "grad_norm": 0.1973213255405426, "learning_rate": 4.5075406847073185e-06, "loss": 0.4654, "step": 9090 }, { "epoch": 2.3009364717792966, "grad_norm": 0.18835115432739258, "learning_rate": 4.505539999462322e-06, "loss": 0.4681, "step": 9091 }, { "epoch": 2.301189572260187, "grad_norm": 0.18505744636058807, "learning_rate": 4.503539629203057e-06, "loss": 0.4645, "step": 9092 }, { "epoch": 2.301442672741078, "grad_norm": 0.1872905045747757, "learning_rate": 4.5015395740442115e-06, "loss": 0.4523, "step": 9093 }, { "epoch": 2.301695773221969, "grad_norm": 0.18851947784423828, "learning_rate": 4.499539834100437e-06, "loss": 0.4775, "step": 9094 }, { "epoch": 2.30194887370286, "grad_norm": 0.18175001442432404, "learning_rate": 4.497540409486377e-06, "loss": 0.4506, "step": 9095 }, { "epoch": 2.3022019741837507, "grad_norm": 0.1830260306596756, "learning_rate": 4.495541300316657e-06, "loss": 0.4504, "step": 9096 }, { "epoch": 2.3024550746646417, "grad_norm": 0.17838487029075623, "learning_rate": 4.493542506705873e-06, "loss": 0.4354, "step": 9097 }, { "epoch": 2.3027081751455327, "grad_norm": 0.18488052487373352, "learning_rate": 4.491544028768614e-06, "loss": 0.4572, "step": 9098 }, { "epoch": 2.3029612756264237, "grad_norm": 0.18602508306503296, "learning_rate": 4.489545866619455e-06, "loss": 0.4664, "step": 9099 }, { "epoch": 2.3032143761073147, "grad_norm": 0.18897786736488342, "learning_rate": 4.487548020372942e-06, "loss": 0.4493, "step": 9100 }, { "epoch": 2.3034674765882057, "grad_norm": 0.17734768986701965, "learning_rate": 4.485550490143611e-06, "loss": 0.4268, "step": 9101 }, { "epoch": 2.3037205770690963, "grad_norm": 0.17680460214614868, "learning_rate": 4.48355327604597e-06, "loss": 0.4431, "step": 9102 }, { "epoch": 2.3039736775499873, "grad_norm": 0.18252871930599213, "learning_rate": 4.481556378194517e-06, "loss": 0.4598, "step": 9103 }, { "epoch": 2.3042267780308783, "grad_norm": 0.18419629335403442, "learning_rate": 4.479559796703732e-06, "loss": 0.4605, "step": 9104 }, { "epoch": 2.3044798785117693, "grad_norm": 0.18093298375606537, "learning_rate": 4.4775635316880695e-06, "loss": 0.4436, "step": 9105 }, { "epoch": 2.3047329789926603, "grad_norm": 0.18489067256450653, "learning_rate": 4.475567583261979e-06, "loss": 0.4816, "step": 9106 }, { "epoch": 2.304986079473551, "grad_norm": 0.18434369564056396, "learning_rate": 4.473571951539884e-06, "loss": 0.4736, "step": 9107 }, { "epoch": 2.305239179954442, "grad_norm": 0.18094298243522644, "learning_rate": 4.471576636636181e-06, "loss": 0.4445, "step": 9108 }, { "epoch": 2.305492280435333, "grad_norm": 0.18627938628196716, "learning_rate": 4.469581638665265e-06, "loss": 0.4457, "step": 9109 }, { "epoch": 2.305745380916224, "grad_norm": 0.1794365793466568, "learning_rate": 4.4675869577415e-06, "loss": 0.4542, "step": 9110 }, { "epoch": 2.305998481397115, "grad_norm": 0.1822260022163391, "learning_rate": 4.465592593979237e-06, "loss": 0.4615, "step": 9111 }, { "epoch": 2.3062515818780054, "grad_norm": 0.17837099730968475, "learning_rate": 4.463598547492817e-06, "loss": 0.4525, "step": 9112 }, { "epoch": 2.3065046823588964, "grad_norm": 0.18315427005290985, "learning_rate": 4.461604818396545e-06, "loss": 0.4439, "step": 9113 }, { "epoch": 2.3067577828397874, "grad_norm": 0.21185173094272614, "learning_rate": 4.45961140680472e-06, "loss": 0.4458, "step": 9114 }, { "epoch": 2.3070108833206784, "grad_norm": 0.18520496785640717, "learning_rate": 4.457618312831622e-06, "loss": 0.4472, "step": 9115 }, { "epoch": 2.3072639838015694, "grad_norm": 0.18160095810890198, "learning_rate": 4.455625536591508e-06, "loss": 0.4334, "step": 9116 }, { "epoch": 2.30751708428246, "grad_norm": 0.1923055499792099, "learning_rate": 4.4536330781986215e-06, "loss": 0.4472, "step": 9117 }, { "epoch": 2.307770184763351, "grad_norm": 0.17932575941085815, "learning_rate": 4.451640937767185e-06, "loss": 0.4517, "step": 9118 }, { "epoch": 2.308023285244242, "grad_norm": 0.17420744895935059, "learning_rate": 4.449649115411404e-06, "loss": 0.4147, "step": 9119 }, { "epoch": 2.308276385725133, "grad_norm": 0.1751914918422699, "learning_rate": 4.447657611245466e-06, "loss": 0.4542, "step": 9120 }, { "epoch": 2.308529486206024, "grad_norm": 0.2825241684913635, "learning_rate": 4.44566642538354e-06, "loss": 0.4455, "step": 9121 }, { "epoch": 2.308782586686915, "grad_norm": 0.1794583648443222, "learning_rate": 4.443675557939775e-06, "loss": 0.4467, "step": 9122 }, { "epoch": 2.3090356871678055, "grad_norm": 0.18420936167240143, "learning_rate": 4.441685009028304e-06, "loss": 0.454, "step": 9123 }, { "epoch": 2.3092887876486965, "grad_norm": 0.17942914366722107, "learning_rate": 4.4396947787632406e-06, "loss": 0.445, "step": 9124 }, { "epoch": 2.3095418881295875, "grad_norm": 0.1909375637769699, "learning_rate": 4.437704867258681e-06, "loss": 0.4366, "step": 9125 }, { "epoch": 2.3097949886104785, "grad_norm": 0.17840373516082764, "learning_rate": 4.4357152746287024e-06, "loss": 0.4615, "step": 9126 }, { "epoch": 2.310048089091369, "grad_norm": 0.18519175052642822, "learning_rate": 4.433726000987364e-06, "loss": 0.4623, "step": 9127 }, { "epoch": 2.31030118957226, "grad_norm": 0.17800873517990112, "learning_rate": 4.431737046448707e-06, "loss": 0.4824, "step": 9128 }, { "epoch": 2.310554290053151, "grad_norm": 0.181775763630867, "learning_rate": 4.429748411126753e-06, "loss": 0.4789, "step": 9129 }, { "epoch": 2.310807390534042, "grad_norm": 0.18676526844501495, "learning_rate": 4.4277600951355074e-06, "loss": 0.4817, "step": 9130 }, { "epoch": 2.311060491014933, "grad_norm": 0.18042606115341187, "learning_rate": 4.425772098588954e-06, "loss": 0.4413, "step": 9131 }, { "epoch": 2.311313591495824, "grad_norm": 0.1949497014284134, "learning_rate": 4.423784421601063e-06, "loss": 0.4507, "step": 9132 }, { "epoch": 2.3115666919767146, "grad_norm": 0.17893549799919128, "learning_rate": 4.421797064285782e-06, "loss": 0.4721, "step": 9133 }, { "epoch": 2.3118197924576056, "grad_norm": 0.18263216316699982, "learning_rate": 4.419810026757047e-06, "loss": 0.4384, "step": 9134 }, { "epoch": 2.3120728929384966, "grad_norm": 0.18298441171646118, "learning_rate": 4.417823309128762e-06, "loss": 0.4631, "step": 9135 }, { "epoch": 2.3123259934193876, "grad_norm": 0.18080675601959229, "learning_rate": 4.4158369115148214e-06, "loss": 0.4535, "step": 9136 }, { "epoch": 2.3125790939002786, "grad_norm": 0.18442565202713013, "learning_rate": 4.4138508340291095e-06, "loss": 0.4513, "step": 9137 }, { "epoch": 2.312832194381169, "grad_norm": 0.17945663630962372, "learning_rate": 4.41186507678548e-06, "loss": 0.477, "step": 9138 }, { "epoch": 2.31308529486206, "grad_norm": 0.1813688576221466, "learning_rate": 4.409879639897771e-06, "loss": 0.4687, "step": 9139 }, { "epoch": 2.313338395342951, "grad_norm": 0.1849457174539566, "learning_rate": 4.407894523479809e-06, "loss": 0.4449, "step": 9140 }, { "epoch": 2.313591495823842, "grad_norm": 0.17938169836997986, "learning_rate": 4.405909727645388e-06, "loss": 0.4556, "step": 9141 }, { "epoch": 2.313844596304733, "grad_norm": 0.19736087322235107, "learning_rate": 4.403925252508291e-06, "loss": 0.4159, "step": 9142 }, { "epoch": 2.3140976967856237, "grad_norm": 0.18390554189682007, "learning_rate": 4.401941098182293e-06, "loss": 0.4387, "step": 9143 }, { "epoch": 2.3143507972665147, "grad_norm": 0.19505751132965088, "learning_rate": 4.399957264781137e-06, "loss": 0.4448, "step": 9144 }, { "epoch": 2.3146038977474057, "grad_norm": 0.1848755031824112, "learning_rate": 4.397973752418556e-06, "loss": 0.4513, "step": 9145 }, { "epoch": 2.3148569982282967, "grad_norm": 0.1847560852766037, "learning_rate": 4.395990561208253e-06, "loss": 0.4384, "step": 9146 }, { "epoch": 2.3151100987091877, "grad_norm": 0.29186806082725525, "learning_rate": 4.394007691263923e-06, "loss": 0.4818, "step": 9147 }, { "epoch": 2.3153631991900783, "grad_norm": 0.18669141829013824, "learning_rate": 4.392025142699241e-06, "loss": 0.4493, "step": 9148 }, { "epoch": 2.3156162996709693, "grad_norm": 0.18882685899734497, "learning_rate": 4.390042915627857e-06, "loss": 0.4644, "step": 9149 }, { "epoch": 2.3158694001518603, "grad_norm": 0.18072518706321716, "learning_rate": 4.3880610101634166e-06, "loss": 0.4322, "step": 9150 }, { "epoch": 2.3161225006327513, "grad_norm": 0.18533532321453094, "learning_rate": 4.386079426419539e-06, "loss": 0.4508, "step": 9151 }, { "epoch": 2.3163756011136423, "grad_norm": 0.1782267987728119, "learning_rate": 4.3840981645098145e-06, "loss": 0.4295, "step": 9152 }, { "epoch": 2.3166287015945333, "grad_norm": 0.1784624308347702, "learning_rate": 4.38211722454783e-06, "loss": 0.4716, "step": 9153 }, { "epoch": 2.316881802075424, "grad_norm": 0.17839987576007843, "learning_rate": 4.380136606647148e-06, "loss": 0.4577, "step": 9154 }, { "epoch": 2.317134902556315, "grad_norm": 0.18541322648525238, "learning_rate": 4.37815631092131e-06, "loss": 0.4558, "step": 9155 }, { "epoch": 2.317388003037206, "grad_norm": 0.18342378735542297, "learning_rate": 4.376176337483852e-06, "loss": 0.4719, "step": 9156 }, { "epoch": 2.317641103518097, "grad_norm": 0.2157626897096634, "learning_rate": 4.374196686448272e-06, "loss": 0.4566, "step": 9157 }, { "epoch": 2.3178942039989874, "grad_norm": 0.17338557541370392, "learning_rate": 4.372217357928061e-06, "loss": 0.4232, "step": 9158 }, { "epoch": 2.3181473044798784, "grad_norm": 0.1882164031267166, "learning_rate": 4.370238352036693e-06, "loss": 0.4598, "step": 9159 }, { "epoch": 2.3184004049607694, "grad_norm": 0.1796370893716812, "learning_rate": 4.368259668887617e-06, "loss": 0.4314, "step": 9160 }, { "epoch": 2.3186535054416604, "grad_norm": 0.18330878019332886, "learning_rate": 4.366281308594268e-06, "loss": 0.4252, "step": 9161 }, { "epoch": 2.3189066059225514, "grad_norm": 0.18313869833946228, "learning_rate": 4.364303271270061e-06, "loss": 0.4627, "step": 9162 }, { "epoch": 2.3191597064034424, "grad_norm": 0.17516787350177765, "learning_rate": 4.3623255570283925e-06, "loss": 0.4094, "step": 9163 }, { "epoch": 2.319412806884333, "grad_norm": 0.1852860301733017, "learning_rate": 4.360348165982642e-06, "loss": 0.4411, "step": 9164 }, { "epoch": 2.319665907365224, "grad_norm": 0.1824382096529007, "learning_rate": 4.358371098246167e-06, "loss": 0.4372, "step": 9165 }, { "epoch": 2.319919007846115, "grad_norm": 0.19077368080615997, "learning_rate": 4.35639435393231e-06, "loss": 0.4324, "step": 9166 }, { "epoch": 2.320172108327006, "grad_norm": 0.18808726966381073, "learning_rate": 4.354417933154394e-06, "loss": 0.4642, "step": 9167 }, { "epoch": 2.3204252088078965, "grad_norm": 0.18232998251914978, "learning_rate": 4.352441836025722e-06, "loss": 0.4512, "step": 9168 }, { "epoch": 2.3206783092887875, "grad_norm": 0.19328515231609344, "learning_rate": 4.350466062659579e-06, "loss": 0.4558, "step": 9169 }, { "epoch": 2.3209314097696785, "grad_norm": 0.18167375028133392, "learning_rate": 4.348490613169232e-06, "loss": 0.4418, "step": 9170 }, { "epoch": 2.3211845102505695, "grad_norm": 0.1774705946445465, "learning_rate": 4.34651548766793e-06, "loss": 0.4522, "step": 9171 }, { "epoch": 2.3214376107314605, "grad_norm": 0.1822841763496399, "learning_rate": 4.344540686268904e-06, "loss": 0.4287, "step": 9172 }, { "epoch": 2.3216907112123515, "grad_norm": 0.17998553812503815, "learning_rate": 4.342566209085363e-06, "loss": 0.4536, "step": 9173 }, { "epoch": 2.321943811693242, "grad_norm": 0.19539238512516022, "learning_rate": 4.340592056230502e-06, "loss": 0.4488, "step": 9174 }, { "epoch": 2.322196912174133, "grad_norm": 0.17974917590618134, "learning_rate": 4.338618227817491e-06, "loss": 0.4569, "step": 9175 }, { "epoch": 2.322450012655024, "grad_norm": 0.18453632295131683, "learning_rate": 4.33664472395949e-06, "loss": 0.4698, "step": 9176 }, { "epoch": 2.322703113135915, "grad_norm": 0.17409759759902954, "learning_rate": 4.334671544769632e-06, "loss": 0.4394, "step": 9177 }, { "epoch": 2.322956213616806, "grad_norm": 0.18337997794151306, "learning_rate": 4.332698690361038e-06, "loss": 0.439, "step": 9178 }, { "epoch": 2.3232093140976966, "grad_norm": 0.1814962923526764, "learning_rate": 4.330726160846808e-06, "loss": 0.4199, "step": 9179 }, { "epoch": 2.3234624145785876, "grad_norm": 0.18114835023880005, "learning_rate": 4.328753956340016e-06, "loss": 0.43, "step": 9180 }, { "epoch": 2.3237155150594786, "grad_norm": 0.18566283583641052, "learning_rate": 4.3267820769537324e-06, "loss": 0.4296, "step": 9181 }, { "epoch": 2.3239686155403696, "grad_norm": 0.1766074299812317, "learning_rate": 4.324810522801e-06, "loss": 0.4689, "step": 9182 }, { "epoch": 2.3242217160212606, "grad_norm": 0.18068820238113403, "learning_rate": 4.322839293994839e-06, "loss": 0.4436, "step": 9183 }, { "epoch": 2.3244748165021516, "grad_norm": 0.18578772246837616, "learning_rate": 4.320868390648264e-06, "loss": 0.4471, "step": 9184 }, { "epoch": 2.324727916983042, "grad_norm": 0.17756418883800507, "learning_rate": 4.318897812874253e-06, "loss": 0.4451, "step": 9185 }, { "epoch": 2.324981017463933, "grad_norm": 0.18315505981445312, "learning_rate": 4.3169275607857765e-06, "loss": 0.4475, "step": 9186 }, { "epoch": 2.325234117944824, "grad_norm": 0.1785789579153061, "learning_rate": 4.31495763449579e-06, "loss": 0.4448, "step": 9187 }, { "epoch": 2.325487218425715, "grad_norm": 0.20391668379306793, "learning_rate": 4.312988034117224e-06, "loss": 0.4582, "step": 9188 }, { "epoch": 2.3257403189066057, "grad_norm": 0.18377217650413513, "learning_rate": 4.31101875976299e-06, "loss": 0.4639, "step": 9189 }, { "epoch": 2.3259934193874967, "grad_norm": 0.17392490804195404, "learning_rate": 4.3090498115459865e-06, "loss": 0.4357, "step": 9190 }, { "epoch": 2.3262465198683877, "grad_norm": 0.1858973354101181, "learning_rate": 4.307081189579082e-06, "loss": 0.461, "step": 9191 }, { "epoch": 2.3264996203492787, "grad_norm": 0.18723344802856445, "learning_rate": 4.305112893975137e-06, "loss": 0.4505, "step": 9192 }, { "epoch": 2.3267527208301697, "grad_norm": 0.18332475423812866, "learning_rate": 4.303144924846986e-06, "loss": 0.4616, "step": 9193 }, { "epoch": 2.3270058213110607, "grad_norm": 0.18580445647239685, "learning_rate": 4.301177282307455e-06, "loss": 0.4599, "step": 9194 }, { "epoch": 2.3272589217919513, "grad_norm": 0.17761172354221344, "learning_rate": 4.299209966469346e-06, "loss": 0.4351, "step": 9195 }, { "epoch": 2.3275120222728423, "grad_norm": 0.18789583444595337, "learning_rate": 4.297242977445433e-06, "loss": 0.447, "step": 9196 }, { "epoch": 2.3277651227537333, "grad_norm": 0.18426857888698578, "learning_rate": 4.295276315348484e-06, "loss": 0.4353, "step": 9197 }, { "epoch": 2.3280182232346243, "grad_norm": 0.17842070758342743, "learning_rate": 4.2933099802912415e-06, "loss": 0.4471, "step": 9198 }, { "epoch": 2.328271323715515, "grad_norm": 0.18205711245536804, "learning_rate": 4.291343972386429e-06, "loss": 0.4547, "step": 9199 }, { "epoch": 2.328524424196406, "grad_norm": 0.1821907013654709, "learning_rate": 4.2893782917467604e-06, "loss": 0.4529, "step": 9200 }, { "epoch": 2.328777524677297, "grad_norm": 0.181947261095047, "learning_rate": 4.287412938484926e-06, "loss": 0.4652, "step": 9201 }, { "epoch": 2.329030625158188, "grad_norm": 0.17704236507415771, "learning_rate": 4.285447912713585e-06, "loss": 0.4625, "step": 9202 }, { "epoch": 2.329283725639079, "grad_norm": 0.18477018177509308, "learning_rate": 4.283483214545394e-06, "loss": 0.4472, "step": 9203 }, { "epoch": 2.32953682611997, "grad_norm": 0.18592004477977753, "learning_rate": 4.2815188440929845e-06, "loss": 0.4525, "step": 9204 }, { "epoch": 2.3297899266008604, "grad_norm": 0.1813896745443344, "learning_rate": 4.279554801468968e-06, "loss": 0.4512, "step": 9205 }, { "epoch": 2.3300430270817514, "grad_norm": 0.18469356000423431, "learning_rate": 4.277591086785942e-06, "loss": 0.4525, "step": 9206 }, { "epoch": 2.3302961275626424, "grad_norm": 0.18125884234905243, "learning_rate": 4.275627700156479e-06, "loss": 0.4517, "step": 9207 }, { "epoch": 2.3305492280435334, "grad_norm": 0.18172310292720795, "learning_rate": 4.2736646416931374e-06, "loss": 0.4517, "step": 9208 }, { "epoch": 2.3308023285244244, "grad_norm": 0.2193526178598404, "learning_rate": 4.271701911508454e-06, "loss": 0.4755, "step": 9209 }, { "epoch": 2.331055429005315, "grad_norm": 0.18110588192939758, "learning_rate": 4.269739509714951e-06, "loss": 0.4647, "step": 9210 }, { "epoch": 2.331308529486206, "grad_norm": 0.1859745979309082, "learning_rate": 4.267777436425126e-06, "loss": 0.4474, "step": 9211 }, { "epoch": 2.331561629967097, "grad_norm": 0.21572016179561615, "learning_rate": 4.265815691751461e-06, "loss": 0.4447, "step": 9212 }, { "epoch": 2.331814730447988, "grad_norm": 0.17839615046977997, "learning_rate": 4.263854275806418e-06, "loss": 0.4512, "step": 9213 }, { "epoch": 2.332067830928879, "grad_norm": 0.18287920951843262, "learning_rate": 4.261893188702442e-06, "loss": 0.4449, "step": 9214 }, { "epoch": 2.33232093140977, "grad_norm": 0.1801561564207077, "learning_rate": 4.259932430551959e-06, "loss": 0.4602, "step": 9215 }, { "epoch": 2.3325740318906605, "grad_norm": 0.1880442500114441, "learning_rate": 4.257972001467371e-06, "loss": 0.4606, "step": 9216 }, { "epoch": 2.3328271323715515, "grad_norm": 0.17996898293495178, "learning_rate": 4.25601190156107e-06, "loss": 0.439, "step": 9217 }, { "epoch": 2.3330802328524425, "grad_norm": 0.18411464989185333, "learning_rate": 4.254052130945422e-06, "loss": 0.4616, "step": 9218 }, { "epoch": 2.3333333333333335, "grad_norm": 0.1788521707057953, "learning_rate": 4.252092689732776e-06, "loss": 0.4527, "step": 9219 }, { "epoch": 2.333586433814224, "grad_norm": 0.19681416451931, "learning_rate": 4.250133578035463e-06, "loss": 0.4498, "step": 9220 }, { "epoch": 2.333839534295115, "grad_norm": 0.1878724843263626, "learning_rate": 4.248174795965797e-06, "loss": 0.4384, "step": 9221 }, { "epoch": 2.334092634776006, "grad_norm": 0.1790315806865692, "learning_rate": 4.246216343636067e-06, "loss": 0.4611, "step": 9222 }, { "epoch": 2.334345735256897, "grad_norm": 0.18589313328266144, "learning_rate": 4.244258221158554e-06, "loss": 0.4587, "step": 9223 }, { "epoch": 2.334598835737788, "grad_norm": 0.18265344202518463, "learning_rate": 4.2423004286454996e-06, "loss": 0.4241, "step": 9224 }, { "epoch": 2.334851936218679, "grad_norm": 0.1813817024230957, "learning_rate": 4.240342966209152e-06, "loss": 0.4548, "step": 9225 }, { "epoch": 2.3351050366995696, "grad_norm": 0.19447195529937744, "learning_rate": 4.238385833961724e-06, "loss": 0.4407, "step": 9226 }, { "epoch": 2.3353581371804606, "grad_norm": 0.18468263745307922, "learning_rate": 4.236429032015415e-06, "loss": 0.4373, "step": 9227 }, { "epoch": 2.3356112376613516, "grad_norm": 0.18076756596565247, "learning_rate": 4.234472560482404e-06, "loss": 0.4472, "step": 9228 }, { "epoch": 2.3358643381422426, "grad_norm": 0.18242862820625305, "learning_rate": 4.232516419474854e-06, "loss": 0.4415, "step": 9229 }, { "epoch": 2.336117438623133, "grad_norm": 0.1820847988128662, "learning_rate": 4.230560609104897e-06, "loss": 0.4418, "step": 9230 }, { "epoch": 2.336370539104024, "grad_norm": 0.18568791449069977, "learning_rate": 4.228605129484665e-06, "loss": 0.4334, "step": 9231 }, { "epoch": 2.336623639584915, "grad_norm": 0.17833024263381958, "learning_rate": 4.226649980726259e-06, "loss": 0.4463, "step": 9232 }, { "epoch": 2.336876740065806, "grad_norm": 0.17838428914546967, "learning_rate": 4.224695162941762e-06, "loss": 0.4399, "step": 9233 }, { "epoch": 2.337129840546697, "grad_norm": 0.18218493461608887, "learning_rate": 4.222740676243244e-06, "loss": 0.461, "step": 9234 }, { "epoch": 2.337382941027588, "grad_norm": 0.18332430720329285, "learning_rate": 4.220786520742744e-06, "loss": 0.456, "step": 9235 }, { "epoch": 2.3376360415084787, "grad_norm": 0.1862037628889084, "learning_rate": 4.218832696552295e-06, "loss": 0.4647, "step": 9236 }, { "epoch": 2.3378891419893697, "grad_norm": 0.1818566620349884, "learning_rate": 4.2168792037839005e-06, "loss": 0.4651, "step": 9237 }, { "epoch": 2.3381422424702607, "grad_norm": 0.18472924828529358, "learning_rate": 4.214926042549556e-06, "loss": 0.4472, "step": 9238 }, { "epoch": 2.3383953429511517, "grad_norm": 0.17733405530452728, "learning_rate": 4.2129732129612324e-06, "loss": 0.4404, "step": 9239 }, { "epoch": 2.3386484434320427, "grad_norm": 0.20872239768505096, "learning_rate": 4.21102071513088e-06, "loss": 0.4221, "step": 9240 }, { "epoch": 2.3389015439129333, "grad_norm": 0.1852167695760727, "learning_rate": 4.209068549170428e-06, "loss": 0.484, "step": 9241 }, { "epoch": 2.3391546443938243, "grad_norm": 0.18041343986988068, "learning_rate": 4.207116715191792e-06, "loss": 0.4402, "step": 9242 }, { "epoch": 2.3394077448747153, "grad_norm": 0.18581807613372803, "learning_rate": 4.205165213306866e-06, "loss": 0.4548, "step": 9243 }, { "epoch": 2.3396608453556063, "grad_norm": 0.18658339977264404, "learning_rate": 4.203214043627523e-06, "loss": 0.472, "step": 9244 }, { "epoch": 2.3399139458364973, "grad_norm": 0.18154416978359222, "learning_rate": 4.201263206265629e-06, "loss": 0.4526, "step": 9245 }, { "epoch": 2.340167046317388, "grad_norm": 0.182246595621109, "learning_rate": 4.199312701333012e-06, "loss": 0.4429, "step": 9246 }, { "epoch": 2.340420146798279, "grad_norm": 0.1808202713727951, "learning_rate": 4.1973625289414935e-06, "loss": 0.4606, "step": 9247 }, { "epoch": 2.34067324727917, "grad_norm": 0.18056809902191162, "learning_rate": 4.1954126892028715e-06, "loss": 0.4593, "step": 9248 }, { "epoch": 2.340926347760061, "grad_norm": 0.18823830783367157, "learning_rate": 4.193463182228927e-06, "loss": 0.4458, "step": 9249 }, { "epoch": 2.341179448240952, "grad_norm": 0.17508620023727417, "learning_rate": 4.191514008131418e-06, "loss": 0.4392, "step": 9250 }, { "epoch": 2.3414325487218424, "grad_norm": 0.1886207014322281, "learning_rate": 4.1895651670220974e-06, "loss": 0.4708, "step": 9251 }, { "epoch": 2.3416856492027334, "grad_norm": 0.17978237569332123, "learning_rate": 4.187616659012676e-06, "loss": 0.4443, "step": 9252 }, { "epoch": 2.3419387496836244, "grad_norm": 0.17982828617095947, "learning_rate": 4.185668484214862e-06, "loss": 0.4768, "step": 9253 }, { "epoch": 2.3421918501645154, "grad_norm": 0.18375113606452942, "learning_rate": 4.18372064274034e-06, "loss": 0.4582, "step": 9254 }, { "epoch": 2.3424449506454064, "grad_norm": 0.1764940470457077, "learning_rate": 4.181773134700776e-06, "loss": 0.4464, "step": 9255 }, { "epoch": 2.3426980511262974, "grad_norm": 0.22169966995716095, "learning_rate": 4.179825960207817e-06, "loss": 0.445, "step": 9256 }, { "epoch": 2.342951151607188, "grad_norm": 0.179849773645401, "learning_rate": 4.17787911937309e-06, "loss": 0.4738, "step": 9257 }, { "epoch": 2.343204252088079, "grad_norm": 0.20402124524116516, "learning_rate": 4.1759326123082025e-06, "loss": 0.4808, "step": 9258 }, { "epoch": 2.34345735256897, "grad_norm": 0.18401730060577393, "learning_rate": 4.173986439124743e-06, "loss": 0.459, "step": 9259 }, { "epoch": 2.343710453049861, "grad_norm": 0.17992638051509857, "learning_rate": 4.172040599934284e-06, "loss": 0.4475, "step": 9260 }, { "epoch": 2.3439635535307515, "grad_norm": 0.18725278973579407, "learning_rate": 4.170095094848374e-06, "loss": 0.4439, "step": 9261 }, { "epoch": 2.3442166540116425, "grad_norm": 0.17989790439605713, "learning_rate": 4.168149923978546e-06, "loss": 0.4404, "step": 9262 }, { "epoch": 2.3444697544925335, "grad_norm": 0.19437578320503235, "learning_rate": 4.166205087436311e-06, "loss": 0.4553, "step": 9263 }, { "epoch": 2.3447228549734245, "grad_norm": 0.1789783239364624, "learning_rate": 4.164260585333165e-06, "loss": 0.4813, "step": 9264 }, { "epoch": 2.3449759554543155, "grad_norm": 0.17858460545539856, "learning_rate": 4.1623164177805795e-06, "loss": 0.4426, "step": 9265 }, { "epoch": 2.3452290559352065, "grad_norm": 0.18327884376049042, "learning_rate": 4.160372584890012e-06, "loss": 0.4396, "step": 9266 }, { "epoch": 2.345482156416097, "grad_norm": 0.18070414662361145, "learning_rate": 4.1584290867728995e-06, "loss": 0.4555, "step": 9267 }, { "epoch": 2.345735256896988, "grad_norm": 0.18866772949695587, "learning_rate": 4.156485923540648e-06, "loss": 0.4391, "step": 9268 }, { "epoch": 2.345988357377879, "grad_norm": 0.18228036165237427, "learning_rate": 4.154543095304668e-06, "loss": 0.43, "step": 9269 }, { "epoch": 2.34624145785877, "grad_norm": 0.18147878348827362, "learning_rate": 4.152600602176333e-06, "loss": 0.4326, "step": 9270 }, { "epoch": 2.346494558339661, "grad_norm": 0.18637894093990326, "learning_rate": 4.150658444267e-06, "loss": 0.4656, "step": 9271 }, { "epoch": 2.3467476588205516, "grad_norm": 0.18699944019317627, "learning_rate": 4.1487166216880125e-06, "loss": 0.441, "step": 9272 }, { "epoch": 2.3470007593014426, "grad_norm": 0.1832410991191864, "learning_rate": 4.146775134550691e-06, "loss": 0.444, "step": 9273 }, { "epoch": 2.3472538597823336, "grad_norm": 0.17734718322753906, "learning_rate": 4.144833982966333e-06, "loss": 0.4235, "step": 9274 }, { "epoch": 2.3475069602632246, "grad_norm": 0.17934389412403107, "learning_rate": 4.142893167046218e-06, "loss": 0.4408, "step": 9275 }, { "epoch": 2.3477600607441156, "grad_norm": 0.18402838706970215, "learning_rate": 4.140952686901617e-06, "loss": 0.4606, "step": 9276 }, { "epoch": 2.348013161225006, "grad_norm": 0.1790277659893036, "learning_rate": 4.13901254264377e-06, "loss": 0.4712, "step": 9277 }, { "epoch": 2.348266261705897, "grad_norm": 0.18120789527893066, "learning_rate": 4.137072734383904e-06, "loss": 0.4559, "step": 9278 }, { "epoch": 2.348519362186788, "grad_norm": 0.18471874296665192, "learning_rate": 4.13513326223322e-06, "loss": 0.471, "step": 9279 }, { "epoch": 2.348772462667679, "grad_norm": 0.18540959060192108, "learning_rate": 4.133194126302903e-06, "loss": 0.4591, "step": 9280 }, { "epoch": 2.34902556314857, "grad_norm": 0.18599949777126312, "learning_rate": 4.13125532670412e-06, "loss": 0.441, "step": 9281 }, { "epoch": 2.3492786636294607, "grad_norm": 0.18282495439052582, "learning_rate": 4.129316863548023e-06, "loss": 0.461, "step": 9282 }, { "epoch": 2.3495317641103517, "grad_norm": 0.18289878964424133, "learning_rate": 4.1273787369457364e-06, "loss": 0.4396, "step": 9283 }, { "epoch": 2.3497848645912427, "grad_norm": 0.17891976237297058, "learning_rate": 4.1254409470083745e-06, "loss": 0.4457, "step": 9284 }, { "epoch": 2.3500379650721337, "grad_norm": 0.18540234863758087, "learning_rate": 4.123503493847016e-06, "loss": 0.4801, "step": 9285 }, { "epoch": 2.3502910655530247, "grad_norm": 0.18665023148059845, "learning_rate": 4.121566377572739e-06, "loss": 0.465, "step": 9286 }, { "epoch": 2.3505441660339157, "grad_norm": 0.1779831349849701, "learning_rate": 4.119629598296591e-06, "loss": 0.4517, "step": 9287 }, { "epoch": 2.3507972665148062, "grad_norm": 0.18044012784957886, "learning_rate": 4.117693156129601e-06, "loss": 0.4541, "step": 9288 }, { "epoch": 2.3510503669956972, "grad_norm": 0.18538421392440796, "learning_rate": 4.115757051182789e-06, "loss": 0.4664, "step": 9289 }, { "epoch": 2.3513034674765882, "grad_norm": 0.18742665648460388, "learning_rate": 4.113821283567145e-06, "loss": 0.438, "step": 9290 }, { "epoch": 2.3515565679574792, "grad_norm": 0.22246317565441132, "learning_rate": 4.111885853393638e-06, "loss": 0.454, "step": 9291 }, { "epoch": 2.35180966843837, "grad_norm": 0.17600104212760925, "learning_rate": 4.109950760773226e-06, "loss": 0.4276, "step": 9292 }, { "epoch": 2.352062768919261, "grad_norm": 0.18796291947364807, "learning_rate": 4.108016005816842e-06, "loss": 0.4399, "step": 9293 }, { "epoch": 2.352315869400152, "grad_norm": 0.18747277557849884, "learning_rate": 4.106081588635398e-06, "loss": 0.4558, "step": 9294 }, { "epoch": 2.352568969881043, "grad_norm": 0.18198364973068237, "learning_rate": 4.1041475093398005e-06, "loss": 0.4575, "step": 9295 }, { "epoch": 2.352822070361934, "grad_norm": 0.18306033313274384, "learning_rate": 4.102213768040918e-06, "loss": 0.4593, "step": 9296 }, { "epoch": 2.353075170842825, "grad_norm": 0.1830291897058487, "learning_rate": 4.100280364849608e-06, "loss": 0.4525, "step": 9297 }, { "epoch": 2.3533282713237154, "grad_norm": 0.18600504100322723, "learning_rate": 4.098347299876711e-06, "loss": 0.4302, "step": 9298 }, { "epoch": 2.3535813718046064, "grad_norm": 0.1852569282054901, "learning_rate": 4.096414573233044e-06, "loss": 0.4395, "step": 9299 }, { "epoch": 2.3538344722854974, "grad_norm": 0.17948198318481445, "learning_rate": 4.094482185029407e-06, "loss": 0.4421, "step": 9300 }, { "epoch": 2.3540875727663884, "grad_norm": 0.1812257170677185, "learning_rate": 4.0925501353765795e-06, "loss": 0.4621, "step": 9301 }, { "epoch": 2.3543406732472794, "grad_norm": 0.17733143270015717, "learning_rate": 4.0906184243853206e-06, "loss": 0.4381, "step": 9302 }, { "epoch": 2.35459377372817, "grad_norm": 0.18091584742069244, "learning_rate": 4.088687052166372e-06, "loss": 0.4577, "step": 9303 }, { "epoch": 2.354846874209061, "grad_norm": 0.17858777940273285, "learning_rate": 4.086756018830456e-06, "loss": 0.441, "step": 9304 }, { "epoch": 2.355099974689952, "grad_norm": 0.18666504323482513, "learning_rate": 4.084825324488275e-06, "loss": 0.4609, "step": 9305 }, { "epoch": 2.355353075170843, "grad_norm": 0.18398427963256836, "learning_rate": 4.082894969250509e-06, "loss": 0.4493, "step": 9306 }, { "epoch": 2.355606175651734, "grad_norm": 0.1773661971092224, "learning_rate": 4.080964953227824e-06, "loss": 0.4523, "step": 9307 }, { "epoch": 2.3558592761326245, "grad_norm": 0.18255159258842468, "learning_rate": 4.079035276530862e-06, "loss": 0.4664, "step": 9308 }, { "epoch": 2.3561123766135155, "grad_norm": 0.18150363862514496, "learning_rate": 4.077105939270248e-06, "loss": 0.4537, "step": 9309 }, { "epoch": 2.3563654770944065, "grad_norm": 0.18861372768878937, "learning_rate": 4.075176941556587e-06, "loss": 0.4726, "step": 9310 }, { "epoch": 2.3566185775752975, "grad_norm": 0.184637188911438, "learning_rate": 4.073248283500463e-06, "loss": 0.4421, "step": 9311 }, { "epoch": 2.3568716780561885, "grad_norm": 0.1838478147983551, "learning_rate": 4.071319965212442e-06, "loss": 0.4435, "step": 9312 }, { "epoch": 2.357124778537079, "grad_norm": 0.17934547364711761, "learning_rate": 4.069391986803073e-06, "loss": 0.4318, "step": 9313 }, { "epoch": 2.35737787901797, "grad_norm": 0.1800679862499237, "learning_rate": 4.067464348382881e-06, "loss": 0.4509, "step": 9314 }, { "epoch": 2.357630979498861, "grad_norm": 0.18166671693325043, "learning_rate": 4.065537050062373e-06, "loss": 0.4408, "step": 9315 }, { "epoch": 2.357884079979752, "grad_norm": 0.1786419302225113, "learning_rate": 4.063610091952037e-06, "loss": 0.4554, "step": 9316 }, { "epoch": 2.358137180460643, "grad_norm": 0.17748156189918518, "learning_rate": 4.061683474162347e-06, "loss": 0.4466, "step": 9317 }, { "epoch": 2.358390280941534, "grad_norm": 0.18439772725105286, "learning_rate": 4.059757196803742e-06, "loss": 0.4546, "step": 9318 }, { "epoch": 2.3586433814224246, "grad_norm": 0.17834538221359253, "learning_rate": 4.057831259986653e-06, "loss": 0.4548, "step": 9319 }, { "epoch": 2.3588964819033156, "grad_norm": 0.18331794440746307, "learning_rate": 4.055905663821497e-06, "loss": 0.468, "step": 9320 }, { "epoch": 2.3591495823842066, "grad_norm": 0.1864652931690216, "learning_rate": 4.053980408418659e-06, "loss": 0.453, "step": 9321 }, { "epoch": 2.3594026828650976, "grad_norm": 0.178125262260437, "learning_rate": 4.052055493888513e-06, "loss": 0.4572, "step": 9322 }, { "epoch": 2.359655783345988, "grad_norm": 0.18269698321819305, "learning_rate": 4.050130920341411e-06, "loss": 0.4702, "step": 9323 }, { "epoch": 2.359908883826879, "grad_norm": 0.186616912484169, "learning_rate": 4.048206687887679e-06, "loss": 0.4621, "step": 9324 }, { "epoch": 2.36016198430777, "grad_norm": 0.1824035793542862, "learning_rate": 4.046282796637628e-06, "loss": 0.4816, "step": 9325 }, { "epoch": 2.360415084788661, "grad_norm": 0.18782205879688263, "learning_rate": 4.0443592467015605e-06, "loss": 0.4415, "step": 9326 }, { "epoch": 2.360668185269552, "grad_norm": 0.18181711435317993, "learning_rate": 4.042436038189742e-06, "loss": 0.4269, "step": 9327 }, { "epoch": 2.360921285750443, "grad_norm": 0.1816590577363968, "learning_rate": 4.040513171212433e-06, "loss": 0.4193, "step": 9328 }, { "epoch": 2.3611743862313337, "grad_norm": 0.19082002341747284, "learning_rate": 4.038590645879858e-06, "loss": 0.4465, "step": 9329 }, { "epoch": 2.3614274867122247, "grad_norm": 0.17912818491458893, "learning_rate": 4.036668462302236e-06, "loss": 0.467, "step": 9330 }, { "epoch": 2.3616805871931157, "grad_norm": 0.18121610581874847, "learning_rate": 4.034746620589761e-06, "loss": 0.4373, "step": 9331 }, { "epoch": 2.3619336876740067, "grad_norm": 0.18124066293239594, "learning_rate": 4.032825120852605e-06, "loss": 0.4364, "step": 9332 }, { "epoch": 2.3621867881548977, "grad_norm": 0.18642786145210266, "learning_rate": 4.0309039632009305e-06, "loss": 0.4395, "step": 9333 }, { "epoch": 2.3624398886357882, "grad_norm": 0.18752418458461761, "learning_rate": 4.028983147744874e-06, "loss": 0.4445, "step": 9334 }, { "epoch": 2.3626929891166792, "grad_norm": 0.1822156012058258, "learning_rate": 4.027062674594544e-06, "loss": 0.4514, "step": 9335 }, { "epoch": 2.3629460895975702, "grad_norm": 0.1919974833726883, "learning_rate": 4.02514254386004e-06, "loss": 0.4658, "step": 9336 }, { "epoch": 2.3631991900784612, "grad_norm": 0.17884482443332672, "learning_rate": 4.023222755651441e-06, "loss": 0.4363, "step": 9337 }, { "epoch": 2.3634522905593522, "grad_norm": 0.22745946049690247, "learning_rate": 4.021303310078799e-06, "loss": 0.4366, "step": 9338 }, { "epoch": 2.363705391040243, "grad_norm": 0.18016579747200012, "learning_rate": 4.0193842072521626e-06, "loss": 0.4431, "step": 9339 }, { "epoch": 2.363958491521134, "grad_norm": 0.9380684494972229, "learning_rate": 4.017465447281541e-06, "loss": 0.4497, "step": 9340 }, { "epoch": 2.364211592002025, "grad_norm": 0.18222840130329132, "learning_rate": 4.015547030276934e-06, "loss": 0.4513, "step": 9341 }, { "epoch": 2.364464692482916, "grad_norm": 0.18108727037906647, "learning_rate": 4.013628956348321e-06, "loss": 0.4423, "step": 9342 }, { "epoch": 2.364717792963807, "grad_norm": 0.1848457157611847, "learning_rate": 4.011711225605662e-06, "loss": 0.4397, "step": 9343 }, { "epoch": 2.3649708934446974, "grad_norm": 0.18364904820919037, "learning_rate": 4.009793838158895e-06, "loss": 0.4723, "step": 9344 }, { "epoch": 2.3652239939255884, "grad_norm": 0.18631422519683838, "learning_rate": 4.007876794117942e-06, "loss": 0.4484, "step": 9345 }, { "epoch": 2.3654770944064794, "grad_norm": 0.18267996609210968, "learning_rate": 4.005960093592701e-06, "loss": 0.4336, "step": 9346 }, { "epoch": 2.3657301948873704, "grad_norm": 0.17966563999652863, "learning_rate": 4.004043736693053e-06, "loss": 0.4474, "step": 9347 }, { "epoch": 2.3659832953682614, "grad_norm": 0.2743167281150818, "learning_rate": 4.002127723528858e-06, "loss": 0.4553, "step": 9348 }, { "epoch": 2.3662363958491524, "grad_norm": 0.20468157529830933, "learning_rate": 4.0002120542099584e-06, "loss": 0.4351, "step": 9349 }, { "epoch": 2.366489496330043, "grad_norm": 0.18093986809253693, "learning_rate": 3.998296728846175e-06, "loss": 0.4656, "step": 9350 }, { "epoch": 2.366742596810934, "grad_norm": 0.18714794516563416, "learning_rate": 3.996381747547307e-06, "loss": 0.4479, "step": 9351 }, { "epoch": 2.366995697291825, "grad_norm": 0.1916830688714981, "learning_rate": 3.994467110423141e-06, "loss": 0.452, "step": 9352 }, { "epoch": 2.367248797772716, "grad_norm": 0.19093599915504456, "learning_rate": 3.9925528175834345e-06, "loss": 0.4928, "step": 9353 }, { "epoch": 2.3675018982536065, "grad_norm": 0.1847158968448639, "learning_rate": 3.990638869137934e-06, "loss": 0.4304, "step": 9354 }, { "epoch": 2.3677549987344975, "grad_norm": 0.1786860078573227, "learning_rate": 3.988725265196358e-06, "loss": 0.437, "step": 9355 }, { "epoch": 2.3680080992153885, "grad_norm": 0.18255704641342163, "learning_rate": 3.986812005868411e-06, "loss": 0.4453, "step": 9356 }, { "epoch": 2.3682611996962795, "grad_norm": 0.18015657365322113, "learning_rate": 3.984899091263776e-06, "loss": 0.4352, "step": 9357 }, { "epoch": 2.3685143001771705, "grad_norm": 0.18186868727207184, "learning_rate": 3.982986521492118e-06, "loss": 0.4411, "step": 9358 }, { "epoch": 2.3687674006580615, "grad_norm": 0.18791881203651428, "learning_rate": 3.981074296663079e-06, "loss": 0.4472, "step": 9359 }, { "epoch": 2.369020501138952, "grad_norm": 0.19182753562927246, "learning_rate": 3.979162416886282e-06, "loss": 0.4249, "step": 9360 }, { "epoch": 2.369273601619843, "grad_norm": 0.1806834191083908, "learning_rate": 3.977250882271333e-06, "loss": 0.4439, "step": 9361 }, { "epoch": 2.369526702100734, "grad_norm": 0.19451557099819183, "learning_rate": 3.975339692927817e-06, "loss": 0.4858, "step": 9362 }, { "epoch": 2.369779802581625, "grad_norm": 0.18481235206127167, "learning_rate": 3.973428848965292e-06, "loss": 0.4521, "step": 9363 }, { "epoch": 2.3700329030625156, "grad_norm": 0.1966688483953476, "learning_rate": 3.9715183504933085e-06, "loss": 0.4457, "step": 9364 }, { "epoch": 2.3702860035434066, "grad_norm": 0.18580497801303864, "learning_rate": 3.969608197621392e-06, "loss": 0.4366, "step": 9365 }, { "epoch": 2.3705391040242976, "grad_norm": 0.18257422745227814, "learning_rate": 3.967698390459045e-06, "loss": 0.4412, "step": 9366 }, { "epoch": 2.3707922045051886, "grad_norm": 0.18695120513439178, "learning_rate": 3.965788929115756e-06, "loss": 0.4519, "step": 9367 }, { "epoch": 2.3710453049860796, "grad_norm": 0.18660572171211243, "learning_rate": 3.963879813700985e-06, "loss": 0.4255, "step": 9368 }, { "epoch": 2.3712984054669706, "grad_norm": 0.18698759377002716, "learning_rate": 3.961971044324176e-06, "loss": 0.4354, "step": 9369 }, { "epoch": 2.371551505947861, "grad_norm": 0.17752894759178162, "learning_rate": 3.960062621094764e-06, "loss": 0.4472, "step": 9370 }, { "epoch": 2.371804606428752, "grad_norm": 0.18463736772537231, "learning_rate": 3.958154544122148e-06, "loss": 0.4544, "step": 9371 }, { "epoch": 2.372057706909643, "grad_norm": 0.17925843596458435, "learning_rate": 3.956246813515716e-06, "loss": 0.438, "step": 9372 }, { "epoch": 2.372310807390534, "grad_norm": 0.18654552102088928, "learning_rate": 3.954339429384838e-06, "loss": 0.4811, "step": 9373 }, { "epoch": 2.372563907871425, "grad_norm": 0.18473608791828156, "learning_rate": 3.952432391838852e-06, "loss": 0.4459, "step": 9374 }, { "epoch": 2.3728170083523157, "grad_norm": 0.17946857213974, "learning_rate": 3.950525700987089e-06, "loss": 0.4623, "step": 9375 }, { "epoch": 2.3730701088332067, "grad_norm": 0.18472716212272644, "learning_rate": 3.948619356938852e-06, "loss": 0.4711, "step": 9376 }, { "epoch": 2.3733232093140977, "grad_norm": 0.18954436480998993, "learning_rate": 3.946713359803433e-06, "loss": 0.4512, "step": 9377 }, { "epoch": 2.3735763097949887, "grad_norm": 0.1817643791437149, "learning_rate": 3.944807709690101e-06, "loss": 0.458, "step": 9378 }, { "epoch": 2.3738294102758797, "grad_norm": 0.1906863898038864, "learning_rate": 3.9429024067080954e-06, "loss": 0.47, "step": 9379 }, { "epoch": 2.3740825107567707, "grad_norm": 0.18659310042858124, "learning_rate": 3.9409974509666475e-06, "loss": 0.4741, "step": 9380 }, { "epoch": 2.3743356112376612, "grad_norm": 0.18623855710029602, "learning_rate": 3.939092842574961e-06, "loss": 0.4597, "step": 9381 }, { "epoch": 2.3745887117185522, "grad_norm": 0.17723993957042694, "learning_rate": 3.937188581642221e-06, "loss": 0.449, "step": 9382 }, { "epoch": 2.3748418121994432, "grad_norm": 0.18263380229473114, "learning_rate": 3.935284668277605e-06, "loss": 0.4446, "step": 9383 }, { "epoch": 2.3750949126803342, "grad_norm": 0.18557724356651306, "learning_rate": 3.9333811025902555e-06, "loss": 0.4745, "step": 9384 }, { "epoch": 2.375348013161225, "grad_norm": 0.18897086381912231, "learning_rate": 3.931477884689296e-06, "loss": 0.4423, "step": 9385 }, { "epoch": 2.375601113642116, "grad_norm": 0.18089087307453156, "learning_rate": 3.9295750146838375e-06, "loss": 0.4542, "step": 9386 }, { "epoch": 2.375854214123007, "grad_norm": 0.18598873913288116, "learning_rate": 3.927672492682965e-06, "loss": 0.4654, "step": 9387 }, { "epoch": 2.376107314603898, "grad_norm": 0.18669499456882477, "learning_rate": 3.925770318795748e-06, "loss": 0.4299, "step": 9388 }, { "epoch": 2.376360415084789, "grad_norm": 0.18122124671936035, "learning_rate": 3.923868493131234e-06, "loss": 0.449, "step": 9389 }, { "epoch": 2.37661351556568, "grad_norm": 0.18508046865463257, "learning_rate": 3.92196701579845e-06, "loss": 0.445, "step": 9390 }, { "epoch": 2.3768666160465703, "grad_norm": 0.18702881038188934, "learning_rate": 3.920065886906405e-06, "loss": 0.4803, "step": 9391 }, { "epoch": 2.3771197165274613, "grad_norm": 0.18382248282432556, "learning_rate": 3.9181651065640844e-06, "loss": 0.4393, "step": 9392 }, { "epoch": 2.3773728170083523, "grad_norm": 0.18248935043811798, "learning_rate": 3.916264674880458e-06, "loss": 0.4527, "step": 9393 }, { "epoch": 2.3776259174892433, "grad_norm": 0.1834108978509903, "learning_rate": 3.9143645919644725e-06, "loss": 0.4477, "step": 9394 }, { "epoch": 2.377879017970134, "grad_norm": 0.18676932156085968, "learning_rate": 3.912464857925056e-06, "loss": 0.466, "step": 9395 }, { "epoch": 2.378132118451025, "grad_norm": 0.1848004162311554, "learning_rate": 3.910565472871115e-06, "loss": 0.4597, "step": 9396 }, { "epoch": 2.378385218931916, "grad_norm": 0.1848173290491104, "learning_rate": 3.90866643691154e-06, "loss": 0.4539, "step": 9397 }, { "epoch": 2.378638319412807, "grad_norm": 0.18084511160850525, "learning_rate": 3.906767750155196e-06, "loss": 0.4483, "step": 9398 }, { "epoch": 2.378891419893698, "grad_norm": 0.18572697043418884, "learning_rate": 3.904869412710931e-06, "loss": 0.4668, "step": 9399 }, { "epoch": 2.379144520374589, "grad_norm": 0.18171533942222595, "learning_rate": 3.902971424687575e-06, "loss": 0.4276, "step": 9400 }, { "epoch": 2.3793976208554795, "grad_norm": 0.1836734116077423, "learning_rate": 3.901073786193932e-06, "loss": 0.4608, "step": 9401 }, { "epoch": 2.3796507213363705, "grad_norm": 0.18460670113563538, "learning_rate": 3.8991764973387925e-06, "loss": 0.4665, "step": 9402 }, { "epoch": 2.3799038218172615, "grad_norm": 0.1773122400045395, "learning_rate": 3.897279558230923e-06, "loss": 0.4433, "step": 9403 }, { "epoch": 2.3801569222981525, "grad_norm": 0.18014584481716156, "learning_rate": 3.895382968979071e-06, "loss": 0.4353, "step": 9404 }, { "epoch": 2.3804100227790435, "grad_norm": 0.1822248250246048, "learning_rate": 3.893486729691965e-06, "loss": 0.454, "step": 9405 }, { "epoch": 2.380663123259934, "grad_norm": 0.18243375420570374, "learning_rate": 3.891590840478314e-06, "loss": 0.462, "step": 9406 }, { "epoch": 2.380916223740825, "grad_norm": 0.17924858629703522, "learning_rate": 3.8896953014467955e-06, "loss": 0.4662, "step": 9407 }, { "epoch": 2.381169324221716, "grad_norm": 0.20061500370502472, "learning_rate": 3.887800112706088e-06, "loss": 0.4606, "step": 9408 }, { "epoch": 2.381422424702607, "grad_norm": 0.18218781054019928, "learning_rate": 3.885905274364834e-06, "loss": 0.4677, "step": 9409 }, { "epoch": 2.381675525183498, "grad_norm": 0.1858220249414444, "learning_rate": 3.884010786531661e-06, "loss": 0.4609, "step": 9410 }, { "epoch": 2.381928625664389, "grad_norm": 0.18141008913516998, "learning_rate": 3.88211664931518e-06, "loss": 0.4643, "step": 9411 }, { "epoch": 2.3821817261452796, "grad_norm": 0.19395402073860168, "learning_rate": 3.8802228628239715e-06, "loss": 0.4699, "step": 9412 }, { "epoch": 2.3824348266261706, "grad_norm": 0.18039491772651672, "learning_rate": 3.878329427166603e-06, "loss": 0.4497, "step": 9413 }, { "epoch": 2.3826879271070616, "grad_norm": 0.18115781247615814, "learning_rate": 3.876436342451621e-06, "loss": 0.4369, "step": 9414 }, { "epoch": 2.3829410275879526, "grad_norm": 0.18248705565929413, "learning_rate": 3.874543608787556e-06, "loss": 0.44, "step": 9415 }, { "epoch": 2.383194128068843, "grad_norm": 0.18013828992843628, "learning_rate": 3.872651226282913e-06, "loss": 0.4567, "step": 9416 }, { "epoch": 2.383447228549734, "grad_norm": 0.18155579268932343, "learning_rate": 3.870759195046181e-06, "loss": 0.449, "step": 9417 }, { "epoch": 2.383700329030625, "grad_norm": 0.18372465670108795, "learning_rate": 3.868867515185819e-06, "loss": 0.4454, "step": 9418 }, { "epoch": 2.383953429511516, "grad_norm": 0.18018677830696106, "learning_rate": 3.866976186810276e-06, "loss": 0.4378, "step": 9419 }, { "epoch": 2.384206529992407, "grad_norm": 0.19497281312942505, "learning_rate": 3.865085210027977e-06, "loss": 0.4603, "step": 9420 }, { "epoch": 2.384459630473298, "grad_norm": 0.18294572830200195, "learning_rate": 3.863194584947332e-06, "loss": 0.4592, "step": 9421 }, { "epoch": 2.3847127309541887, "grad_norm": 0.18423005938529968, "learning_rate": 3.861304311676722e-06, "loss": 0.4538, "step": 9422 }, { "epoch": 2.3849658314350797, "grad_norm": 0.18163283169269562, "learning_rate": 3.859414390324519e-06, "loss": 0.4679, "step": 9423 }, { "epoch": 2.3852189319159707, "grad_norm": 0.18555666506290436, "learning_rate": 3.8575248209990615e-06, "loss": 0.465, "step": 9424 }, { "epoch": 2.3854720323968617, "grad_norm": 0.19719010591506958, "learning_rate": 3.855635603808674e-06, "loss": 0.4564, "step": 9425 }, { "epoch": 2.3857251328777522, "grad_norm": 0.1811579018831253, "learning_rate": 3.8537467388616646e-06, "loss": 0.4525, "step": 9426 }, { "epoch": 2.3859782333586432, "grad_norm": 0.18544445931911469, "learning_rate": 3.851858226266314e-06, "loss": 0.4388, "step": 9427 }, { "epoch": 2.3862313338395342, "grad_norm": 0.18047399818897247, "learning_rate": 3.849970066130896e-06, "loss": 0.4407, "step": 9428 }, { "epoch": 2.3864844343204252, "grad_norm": 0.18840688467025757, "learning_rate": 3.848082258563645e-06, "loss": 0.4404, "step": 9429 }, { "epoch": 2.3867375348013162, "grad_norm": 0.18085268139839172, "learning_rate": 3.846194803672788e-06, "loss": 0.4732, "step": 9430 }, { "epoch": 2.3869906352822072, "grad_norm": 0.19662132859230042, "learning_rate": 3.844307701566528e-06, "loss": 0.4381, "step": 9431 }, { "epoch": 2.387243735763098, "grad_norm": 0.18205837905406952, "learning_rate": 3.842420952353052e-06, "loss": 0.4401, "step": 9432 }, { "epoch": 2.387496836243989, "grad_norm": 0.18278808891773224, "learning_rate": 3.840534556140517e-06, "loss": 0.4694, "step": 9433 }, { "epoch": 2.38774993672488, "grad_norm": 0.18333812057971954, "learning_rate": 3.838648513037076e-06, "loss": 0.4703, "step": 9434 }, { "epoch": 2.388003037205771, "grad_norm": 0.1830158233642578, "learning_rate": 3.8367628231508445e-06, "loss": 0.4478, "step": 9435 }, { "epoch": 2.388256137686662, "grad_norm": 0.18078464269638062, "learning_rate": 3.834877486589926e-06, "loss": 0.4435, "step": 9436 }, { "epoch": 2.3885092381675523, "grad_norm": 0.18357235193252563, "learning_rate": 3.832992503462403e-06, "loss": 0.4657, "step": 9437 }, { "epoch": 2.3887623386484433, "grad_norm": 0.2078772336244583, "learning_rate": 3.8311078738763374e-06, "loss": 0.4419, "step": 9438 }, { "epoch": 2.3890154391293343, "grad_norm": 0.18953175842761993, "learning_rate": 3.8292235979397725e-06, "loss": 0.4425, "step": 9439 }, { "epoch": 2.3892685396102253, "grad_norm": 0.18167854845523834, "learning_rate": 3.8273396757607286e-06, "loss": 0.4528, "step": 9440 }, { "epoch": 2.3895216400911163, "grad_norm": 0.18412336707115173, "learning_rate": 3.825456107447207e-06, "loss": 0.4584, "step": 9441 }, { "epoch": 2.389774740572007, "grad_norm": 0.1823815405368805, "learning_rate": 3.82357289310719e-06, "loss": 0.4412, "step": 9442 }, { "epoch": 2.390027841052898, "grad_norm": 0.18343773484230042, "learning_rate": 3.821690032848638e-06, "loss": 0.4284, "step": 9443 }, { "epoch": 2.390280941533789, "grad_norm": 0.1831696331501007, "learning_rate": 3.8198075267794895e-06, "loss": 0.4322, "step": 9444 }, { "epoch": 2.39053404201468, "grad_norm": 0.18165935575962067, "learning_rate": 3.817925375007666e-06, "loss": 0.4474, "step": 9445 }, { "epoch": 2.390787142495571, "grad_norm": 0.1780257374048233, "learning_rate": 3.816043577641068e-06, "loss": 0.4523, "step": 9446 }, { "epoch": 2.3910402429764614, "grad_norm": 0.18325378000736237, "learning_rate": 3.8141621347875724e-06, "loss": 0.4636, "step": 9447 }, { "epoch": 2.3912933434573524, "grad_norm": 0.19030135869979858, "learning_rate": 3.8122810465550408e-06, "loss": 0.4337, "step": 9448 }, { "epoch": 2.3915464439382434, "grad_norm": 0.18652626872062683, "learning_rate": 3.8104003130513113e-06, "loss": 0.4682, "step": 9449 }, { "epoch": 2.3917995444191344, "grad_norm": 0.19127750396728516, "learning_rate": 3.808519934384205e-06, "loss": 0.4422, "step": 9450 }, { "epoch": 2.3920526449000254, "grad_norm": 0.18400736153125763, "learning_rate": 3.8066399106615105e-06, "loss": 0.4523, "step": 9451 }, { "epoch": 2.3923057453809164, "grad_norm": 0.19284376502037048, "learning_rate": 3.8047602419910155e-06, "loss": 0.4707, "step": 9452 }, { "epoch": 2.392558845861807, "grad_norm": 0.18438464403152466, "learning_rate": 3.8028809284804747e-06, "loss": 0.4507, "step": 9453 }, { "epoch": 2.392811946342698, "grad_norm": 0.17780044674873352, "learning_rate": 3.8010019702376256e-06, "loss": 0.457, "step": 9454 }, { "epoch": 2.393065046823589, "grad_norm": 0.194447860121727, "learning_rate": 3.799123367370182e-06, "loss": 0.4527, "step": 9455 }, { "epoch": 2.39331814730448, "grad_norm": 0.17875222861766815, "learning_rate": 3.7972451199858474e-06, "loss": 0.4283, "step": 9456 }, { "epoch": 2.3935712477853706, "grad_norm": 0.18362222611904144, "learning_rate": 3.795367228192288e-06, "loss": 0.4455, "step": 9457 }, { "epoch": 2.3938243482662616, "grad_norm": 0.19136039912700653, "learning_rate": 3.793489692097161e-06, "loss": 0.4691, "step": 9458 }, { "epoch": 2.3940774487471526, "grad_norm": 0.18980936706066132, "learning_rate": 3.791612511808107e-06, "loss": 0.4553, "step": 9459 }, { "epoch": 2.3943305492280436, "grad_norm": 0.18633680045604706, "learning_rate": 3.7897356874327397e-06, "loss": 0.4274, "step": 9460 }, { "epoch": 2.3945836497089346, "grad_norm": 0.18610960245132446, "learning_rate": 3.7878592190786544e-06, "loss": 0.4497, "step": 9461 }, { "epoch": 2.3948367501898256, "grad_norm": 0.1800883263349533, "learning_rate": 3.7859831068534193e-06, "loss": 0.4608, "step": 9462 }, { "epoch": 2.395089850670716, "grad_norm": 0.1774851381778717, "learning_rate": 3.7841073508645908e-06, "loss": 0.4444, "step": 9463 }, { "epoch": 2.395342951151607, "grad_norm": 0.18259018659591675, "learning_rate": 3.782231951219699e-06, "loss": 0.471, "step": 9464 }, { "epoch": 2.395596051632498, "grad_norm": 0.18246744573116302, "learning_rate": 3.7803569080262646e-06, "loss": 0.434, "step": 9465 }, { "epoch": 2.395849152113389, "grad_norm": 0.18225596845149994, "learning_rate": 3.778482221391775e-06, "loss": 0.4608, "step": 9466 }, { "epoch": 2.39610225259428, "grad_norm": 0.18097048997879028, "learning_rate": 3.776607891423705e-06, "loss": 0.4533, "step": 9467 }, { "epoch": 2.3963553530751707, "grad_norm": 0.18392060697078705, "learning_rate": 3.7747339182294994e-06, "loss": 0.4239, "step": 9468 }, { "epoch": 2.3966084535560617, "grad_norm": 0.18550926446914673, "learning_rate": 3.7728603019165945e-06, "loss": 0.4567, "step": 9469 }, { "epoch": 2.3968615540369527, "grad_norm": 0.18245021998882294, "learning_rate": 3.7709870425923977e-06, "loss": 0.4585, "step": 9470 }, { "epoch": 2.3971146545178437, "grad_norm": 0.18391768634319305, "learning_rate": 3.7691141403642984e-06, "loss": 0.4645, "step": 9471 }, { "epoch": 2.3973677549987347, "grad_norm": 0.1818842887878418, "learning_rate": 3.7672415953396745e-06, "loss": 0.4617, "step": 9472 }, { "epoch": 2.3976208554796252, "grad_norm": 0.18618451058864594, "learning_rate": 3.765369407625865e-06, "loss": 0.4429, "step": 9473 }, { "epoch": 2.3978739559605162, "grad_norm": 0.1813684105873108, "learning_rate": 3.763497577330203e-06, "loss": 0.4529, "step": 9474 }, { "epoch": 2.3981270564414072, "grad_norm": 0.18525508046150208, "learning_rate": 3.7616261045599946e-06, "loss": 0.4546, "step": 9475 }, { "epoch": 2.3983801569222982, "grad_norm": 0.17735932767391205, "learning_rate": 3.7597549894225293e-06, "loss": 0.456, "step": 9476 }, { "epoch": 2.3986332574031892, "grad_norm": 0.18612927198410034, "learning_rate": 3.7578842320250697e-06, "loss": 0.4511, "step": 9477 }, { "epoch": 2.39888635788408, "grad_norm": 0.18729902803897858, "learning_rate": 3.756013832474873e-06, "loss": 0.4608, "step": 9478 }, { "epoch": 2.399139458364971, "grad_norm": 0.1756046712398529, "learning_rate": 3.7541437908791554e-06, "loss": 0.4455, "step": 9479 }, { "epoch": 2.399392558845862, "grad_norm": 0.18453191220760345, "learning_rate": 3.7522741073451243e-06, "loss": 0.4412, "step": 9480 }, { "epoch": 2.3996456593267528, "grad_norm": 0.17776282131671906, "learning_rate": 3.750404781979966e-06, "loss": 0.4441, "step": 9481 }, { "epoch": 2.3998987598076438, "grad_norm": 0.17937254905700684, "learning_rate": 3.7485358148908448e-06, "loss": 0.4555, "step": 9482 }, { "epoch": 2.4001518602885348, "grad_norm": 0.18484480679035187, "learning_rate": 3.7466672061849042e-06, "loss": 0.4749, "step": 9483 }, { "epoch": 2.4004049607694253, "grad_norm": 0.1842249631881714, "learning_rate": 3.744798955969269e-06, "loss": 0.4569, "step": 9484 }, { "epoch": 2.4006580612503163, "grad_norm": 0.1799848973751068, "learning_rate": 3.74293106435104e-06, "loss": 0.4224, "step": 9485 }, { "epoch": 2.4009111617312073, "grad_norm": 0.1785549521446228, "learning_rate": 3.741063531437299e-06, "loss": 0.4814, "step": 9486 }, { "epoch": 2.4011642622120983, "grad_norm": 0.1815781444311142, "learning_rate": 3.7391963573351107e-06, "loss": 0.4783, "step": 9487 }, { "epoch": 2.401417362692989, "grad_norm": 0.18817676603794098, "learning_rate": 3.7373295421515143e-06, "loss": 0.4624, "step": 9488 }, { "epoch": 2.40167046317388, "grad_norm": 0.1872502565383911, "learning_rate": 3.7354630859935306e-06, "loss": 0.4487, "step": 9489 }, { "epoch": 2.401923563654771, "grad_norm": 0.1792176216840744, "learning_rate": 3.733596988968159e-06, "loss": 0.4867, "step": 9490 }, { "epoch": 2.402176664135662, "grad_norm": 0.18499748408794403, "learning_rate": 3.73173125118238e-06, "loss": 0.4582, "step": 9491 }, { "epoch": 2.402429764616553, "grad_norm": 0.1832091510295868, "learning_rate": 3.7298658727431515e-06, "loss": 0.4621, "step": 9492 }, { "epoch": 2.402682865097444, "grad_norm": 0.18693436682224274, "learning_rate": 3.728000853757412e-06, "loss": 0.4809, "step": 9493 }, { "epoch": 2.4029359655783344, "grad_norm": 0.1854068487882614, "learning_rate": 3.7261361943320797e-06, "loss": 0.4226, "step": 9494 }, { "epoch": 2.4031890660592254, "grad_norm": 0.18282026052474976, "learning_rate": 3.72427189457405e-06, "loss": 0.4454, "step": 9495 }, { "epoch": 2.4034421665401164, "grad_norm": 0.19380897283554077, "learning_rate": 3.7224079545902003e-06, "loss": 0.4463, "step": 9496 }, { "epoch": 2.4036952670210074, "grad_norm": 0.18478929996490479, "learning_rate": 3.720544374487387e-06, "loss": 0.432, "step": 9497 }, { "epoch": 2.4039483675018984, "grad_norm": 0.18707741796970367, "learning_rate": 3.7186811543724443e-06, "loss": 0.4542, "step": 9498 }, { "epoch": 2.404201467982789, "grad_norm": 0.17862622439861298, "learning_rate": 3.7168182943521868e-06, "loss": 0.4325, "step": 9499 }, { "epoch": 2.40445456846368, "grad_norm": 0.180389866232872, "learning_rate": 3.7149557945334113e-06, "loss": 0.4599, "step": 9500 }, { "epoch": 2.404707668944571, "grad_norm": 0.1822456270456314, "learning_rate": 3.7130936550228856e-06, "loss": 0.455, "step": 9501 }, { "epoch": 2.404960769425462, "grad_norm": 0.1916583627462387, "learning_rate": 3.7112318759273603e-06, "loss": 0.4683, "step": 9502 }, { "epoch": 2.405213869906353, "grad_norm": 0.1843297779560089, "learning_rate": 3.7093704573535747e-06, "loss": 0.4608, "step": 9503 }, { "epoch": 2.4054669703872436, "grad_norm": 0.18889828026294708, "learning_rate": 3.7075093994082377e-06, "loss": 0.471, "step": 9504 }, { "epoch": 2.4057200708681346, "grad_norm": 0.18655535578727722, "learning_rate": 3.7056487021980393e-06, "loss": 0.4713, "step": 9505 }, { "epoch": 2.4059731713490256, "grad_norm": 0.26987791061401367, "learning_rate": 3.7037883658296515e-06, "loss": 0.433, "step": 9506 }, { "epoch": 2.4062262718299166, "grad_norm": 0.18099287152290344, "learning_rate": 3.701928390409717e-06, "loss": 0.4412, "step": 9507 }, { "epoch": 2.4064793723108076, "grad_norm": 0.1838638037443161, "learning_rate": 3.7000687760448662e-06, "loss": 0.448, "step": 9508 }, { "epoch": 2.406732472791698, "grad_norm": 0.1869095414876938, "learning_rate": 3.6982095228417102e-06, "loss": 0.4631, "step": 9509 }, { "epoch": 2.406985573272589, "grad_norm": 0.1862931251525879, "learning_rate": 3.6963506309068366e-06, "loss": 0.4563, "step": 9510 }, { "epoch": 2.40723867375348, "grad_norm": 0.18511426448822021, "learning_rate": 3.6944921003468126e-06, "loss": 0.4679, "step": 9511 }, { "epoch": 2.407491774234371, "grad_norm": 0.1870812475681305, "learning_rate": 3.692633931268177e-06, "loss": 0.4498, "step": 9512 }, { "epoch": 2.407744874715262, "grad_norm": 0.18867702782154083, "learning_rate": 3.6907761237774587e-06, "loss": 0.4298, "step": 9513 }, { "epoch": 2.407997975196153, "grad_norm": 0.19236506521701813, "learning_rate": 3.6889186779811626e-06, "loss": 0.4514, "step": 9514 }, { "epoch": 2.4082510756770437, "grad_norm": 0.1818554401397705, "learning_rate": 3.687061593985767e-06, "loss": 0.453, "step": 9515 }, { "epoch": 2.4085041761579347, "grad_norm": 0.17940294742584229, "learning_rate": 3.685204871897743e-06, "loss": 0.4561, "step": 9516 }, { "epoch": 2.4087572766388257, "grad_norm": 0.1826905459165573, "learning_rate": 3.6833485118235314e-06, "loss": 0.4345, "step": 9517 }, { "epoch": 2.4090103771197167, "grad_norm": 0.18389837443828583, "learning_rate": 3.681492513869548e-06, "loss": 0.4421, "step": 9518 }, { "epoch": 2.409263477600607, "grad_norm": 0.18794573843479156, "learning_rate": 3.6796368781421944e-06, "loss": 0.4337, "step": 9519 }, { "epoch": 2.409516578081498, "grad_norm": 0.18144002556800842, "learning_rate": 3.677781604747852e-06, "loss": 0.4531, "step": 9520 }, { "epoch": 2.409769678562389, "grad_norm": 0.1767881065607071, "learning_rate": 3.675926693792876e-06, "loss": 0.4478, "step": 9521 }, { "epoch": 2.41002277904328, "grad_norm": 0.188548281788826, "learning_rate": 3.6740721453836146e-06, "loss": 0.4511, "step": 9522 }, { "epoch": 2.410275879524171, "grad_norm": 0.18314018845558167, "learning_rate": 3.672217959626375e-06, "loss": 0.4677, "step": 9523 }, { "epoch": 2.410528980005062, "grad_norm": 0.17929528653621674, "learning_rate": 3.6703641366274566e-06, "loss": 0.4702, "step": 9524 }, { "epoch": 2.4107820804859528, "grad_norm": 0.1958521604537964, "learning_rate": 3.6685106764931357e-06, "loss": 0.4356, "step": 9525 }, { "epoch": 2.4110351809668438, "grad_norm": 0.18838848173618317, "learning_rate": 3.6666575793296677e-06, "loss": 0.4347, "step": 9526 }, { "epoch": 2.4112882814477348, "grad_norm": 0.19508683681488037, "learning_rate": 3.664804845243285e-06, "loss": 0.4508, "step": 9527 }, { "epoch": 2.4115413819286258, "grad_norm": 0.18699268996715546, "learning_rate": 3.6629524743402024e-06, "loss": 0.4319, "step": 9528 }, { "epoch": 2.4117944824095168, "grad_norm": 0.18004350364208221, "learning_rate": 3.6611004667266126e-06, "loss": 0.4494, "step": 9529 }, { "epoch": 2.4120475828904073, "grad_norm": 0.17976437509059906, "learning_rate": 3.6592488225086864e-06, "loss": 0.4473, "step": 9530 }, { "epoch": 2.4123006833712983, "grad_norm": 0.1809689998626709, "learning_rate": 3.6573975417925744e-06, "loss": 0.438, "step": 9531 }, { "epoch": 2.4125537838521893, "grad_norm": 0.1856929361820221, "learning_rate": 3.6555466246844074e-06, "loss": 0.4809, "step": 9532 }, { "epoch": 2.4128068843330803, "grad_norm": 0.1820024847984314, "learning_rate": 3.653696071290295e-06, "loss": 0.4582, "step": 9533 }, { "epoch": 2.4130599848139713, "grad_norm": 0.18415558338165283, "learning_rate": 3.6518458817163237e-06, "loss": 0.4636, "step": 9534 }, { "epoch": 2.413313085294862, "grad_norm": 0.17635267972946167, "learning_rate": 3.6499960560685634e-06, "loss": 0.4411, "step": 9535 }, { "epoch": 2.413566185775753, "grad_norm": 0.18044452369213104, "learning_rate": 3.648146594453058e-06, "loss": 0.427, "step": 9536 }, { "epoch": 2.413819286256644, "grad_norm": 0.18007291853427887, "learning_rate": 3.646297496975835e-06, "loss": 0.4519, "step": 9537 }, { "epoch": 2.414072386737535, "grad_norm": 0.18414059281349182, "learning_rate": 3.6444487637428993e-06, "loss": 0.4676, "step": 9538 }, { "epoch": 2.414325487218426, "grad_norm": 0.1813613772392273, "learning_rate": 3.6426003948602327e-06, "loss": 0.4465, "step": 9539 }, { "epoch": 2.4145785876993164, "grad_norm": 0.18905657529830933, "learning_rate": 3.6407523904338014e-06, "loss": 0.4522, "step": 9540 }, { "epoch": 2.4148316881802074, "grad_norm": 0.18902526795864105, "learning_rate": 3.638904750569545e-06, "loss": 0.4443, "step": 9541 }, { "epoch": 2.4150847886610984, "grad_norm": 0.1917349398136139, "learning_rate": 3.637057475373387e-06, "loss": 0.4483, "step": 9542 }, { "epoch": 2.4153378891419894, "grad_norm": 0.18775570392608643, "learning_rate": 3.6352105649512258e-06, "loss": 0.4754, "step": 9543 }, { "epoch": 2.4155909896228804, "grad_norm": 0.18134769797325134, "learning_rate": 3.6333640194089414e-06, "loss": 0.4482, "step": 9544 }, { "epoch": 2.4158440901037714, "grad_norm": 0.18594896793365479, "learning_rate": 3.6315178388523964e-06, "loss": 0.4472, "step": 9545 }, { "epoch": 2.416097190584662, "grad_norm": 0.1822948306798935, "learning_rate": 3.6296720233874185e-06, "loss": 0.4397, "step": 9546 }, { "epoch": 2.416350291065553, "grad_norm": 0.18365472555160522, "learning_rate": 3.6278265731198327e-06, "loss": 0.4593, "step": 9547 }, { "epoch": 2.416603391546444, "grad_norm": 0.18240393698215485, "learning_rate": 3.6259814881554334e-06, "loss": 0.4552, "step": 9548 }, { "epoch": 2.416856492027335, "grad_norm": 0.18204672634601593, "learning_rate": 3.624136768599994e-06, "loss": 0.427, "step": 9549 }, { "epoch": 2.4171095925082255, "grad_norm": 0.19727261364459991, "learning_rate": 3.622292414559272e-06, "loss": 0.4626, "step": 9550 }, { "epoch": 2.4173626929891165, "grad_norm": 0.18619057536125183, "learning_rate": 3.620448426138995e-06, "loss": 0.4549, "step": 9551 }, { "epoch": 2.4176157934700075, "grad_norm": 0.18311142921447754, "learning_rate": 3.618604803444873e-06, "loss": 0.4748, "step": 9552 }, { "epoch": 2.4178688939508985, "grad_norm": 0.18828605115413666, "learning_rate": 3.6167615465826044e-06, "loss": 0.4555, "step": 9553 }, { "epoch": 2.4181219944317895, "grad_norm": 0.21144984662532806, "learning_rate": 3.6149186556578565e-06, "loss": 0.457, "step": 9554 }, { "epoch": 2.4183750949126805, "grad_norm": 0.6576638221740723, "learning_rate": 3.6130761307762763e-06, "loss": 0.4595, "step": 9555 }, { "epoch": 2.418628195393571, "grad_norm": 0.18575122952461243, "learning_rate": 3.6112339720434988e-06, "loss": 0.4254, "step": 9556 }, { "epoch": 2.418881295874462, "grad_norm": 0.1839270293712616, "learning_rate": 3.60939217956512e-06, "loss": 0.4361, "step": 9557 }, { "epoch": 2.419134396355353, "grad_norm": 0.1786297708749771, "learning_rate": 3.6075507534467336e-06, "loss": 0.4319, "step": 9558 }, { "epoch": 2.419387496836244, "grad_norm": 0.1816621869802475, "learning_rate": 3.6057096937938973e-06, "loss": 0.4679, "step": 9559 }, { "epoch": 2.4196405973171347, "grad_norm": 0.18408305943012238, "learning_rate": 3.6038690007121647e-06, "loss": 0.4482, "step": 9560 }, { "epoch": 2.4198936977980257, "grad_norm": 0.1814432591199875, "learning_rate": 3.6020286743070575e-06, "loss": 0.4423, "step": 9561 }, { "epoch": 2.4201467982789167, "grad_norm": 0.1788274198770523, "learning_rate": 3.600188714684072e-06, "loss": 0.4435, "step": 9562 }, { "epoch": 2.4203998987598077, "grad_norm": 0.1865665167570114, "learning_rate": 3.5983491219486923e-06, "loss": 0.4638, "step": 9563 }, { "epoch": 2.4206529992406987, "grad_norm": 0.17970436811447144, "learning_rate": 3.596509896206377e-06, "loss": 0.4383, "step": 9564 }, { "epoch": 2.4209060997215897, "grad_norm": 0.1963399201631546, "learning_rate": 3.5946710375625658e-06, "loss": 0.4481, "step": 9565 }, { "epoch": 2.42115920020248, "grad_norm": 0.18500499427318573, "learning_rate": 3.5928325461226743e-06, "loss": 0.4579, "step": 9566 }, { "epoch": 2.421412300683371, "grad_norm": 0.18021154403686523, "learning_rate": 3.590994421992109e-06, "loss": 0.4639, "step": 9567 }, { "epoch": 2.421665401164262, "grad_norm": 0.18956807255744934, "learning_rate": 3.5891566652762332e-06, "loss": 0.4507, "step": 9568 }, { "epoch": 2.421918501645153, "grad_norm": 0.18264029920101166, "learning_rate": 3.587319276080409e-06, "loss": 0.4552, "step": 9569 }, { "epoch": 2.422171602126044, "grad_norm": 0.23548932373523712, "learning_rate": 3.5854822545099664e-06, "loss": 0.4456, "step": 9570 }, { "epoch": 2.4224247026069348, "grad_norm": 0.1944740265607834, "learning_rate": 3.583645600670219e-06, "loss": 0.4631, "step": 9571 }, { "epoch": 2.4226778030878258, "grad_norm": 0.17744481563568115, "learning_rate": 3.58180931466646e-06, "loss": 0.4476, "step": 9572 }, { "epoch": 2.4229309035687168, "grad_norm": 0.18726980686187744, "learning_rate": 3.5799733966039574e-06, "loss": 0.4481, "step": 9573 }, { "epoch": 2.4231840040496078, "grad_norm": 0.18958747386932373, "learning_rate": 3.5781378465879624e-06, "loss": 0.4357, "step": 9574 }, { "epoch": 2.4234371045304988, "grad_norm": 0.18278442323207855, "learning_rate": 3.576302664723701e-06, "loss": 0.449, "step": 9575 }, { "epoch": 2.4236902050113898, "grad_norm": 0.187727153301239, "learning_rate": 3.5744678511163834e-06, "loss": 0.4724, "step": 9576 }, { "epoch": 2.4239433054922803, "grad_norm": 0.18599937856197357, "learning_rate": 3.572633405871192e-06, "loss": 0.4289, "step": 9577 }, { "epoch": 2.4241964059731713, "grad_norm": 0.18503467738628387, "learning_rate": 3.570799329093294e-06, "loss": 0.452, "step": 9578 }, { "epoch": 2.4244495064540623, "grad_norm": 0.18838563561439514, "learning_rate": 3.568965620887833e-06, "loss": 0.4792, "step": 9579 }, { "epoch": 2.4247026069349533, "grad_norm": 0.18341930210590363, "learning_rate": 3.5671322813599316e-06, "loss": 0.4506, "step": 9580 }, { "epoch": 2.424955707415844, "grad_norm": 0.1873510479927063, "learning_rate": 3.5652993106146903e-06, "loss": 0.4596, "step": 9581 }, { "epoch": 2.425208807896735, "grad_norm": 0.18744975328445435, "learning_rate": 3.5634667087571896e-06, "loss": 0.4335, "step": 9582 }, { "epoch": 2.425461908377626, "grad_norm": 0.1837841272354126, "learning_rate": 3.561634475892489e-06, "loss": 0.4354, "step": 9583 }, { "epoch": 2.425715008858517, "grad_norm": 0.1764218509197235, "learning_rate": 3.5598026121256268e-06, "loss": 0.4289, "step": 9584 }, { "epoch": 2.425968109339408, "grad_norm": 0.17901290953159332, "learning_rate": 3.5579711175616203e-06, "loss": 0.4571, "step": 9585 }, { "epoch": 2.426221209820299, "grad_norm": 0.18762503564357758, "learning_rate": 3.556139992305464e-06, "loss": 0.4403, "step": 9586 }, { "epoch": 2.4264743103011894, "grad_norm": 0.182305708527565, "learning_rate": 3.5543092364621323e-06, "loss": 0.4573, "step": 9587 }, { "epoch": 2.4267274107820804, "grad_norm": 0.18100185692310333, "learning_rate": 3.5524788501365803e-06, "loss": 0.4419, "step": 9588 }, { "epoch": 2.4269805112629714, "grad_norm": 0.1883816123008728, "learning_rate": 3.550648833433742e-06, "loss": 0.4688, "step": 9589 }, { "epoch": 2.4272336117438624, "grad_norm": 0.1833227425813675, "learning_rate": 3.5488191864585186e-06, "loss": 0.4537, "step": 9590 }, { "epoch": 2.427486712224753, "grad_norm": 0.181449294090271, "learning_rate": 3.54698990931581e-06, "loss": 0.4517, "step": 9591 }, { "epoch": 2.427739812705644, "grad_norm": 0.18087856471538544, "learning_rate": 3.5451610021104808e-06, "loss": 0.4513, "step": 9592 }, { "epoch": 2.427992913186535, "grad_norm": 0.18469879031181335, "learning_rate": 3.5433324649473797e-06, "loss": 0.4455, "step": 9593 }, { "epoch": 2.428246013667426, "grad_norm": 0.1839916855096817, "learning_rate": 3.541504297931336e-06, "loss": 0.4664, "step": 9594 }, { "epoch": 2.428499114148317, "grad_norm": 0.18478699028491974, "learning_rate": 3.5396765011671475e-06, "loss": 0.4716, "step": 9595 }, { "epoch": 2.428752214629208, "grad_norm": 0.1829681247472763, "learning_rate": 3.5378490747596007e-06, "loss": 0.4349, "step": 9596 }, { "epoch": 2.4290053151100985, "grad_norm": 0.1890496015548706, "learning_rate": 3.5360220188134563e-06, "loss": 0.4724, "step": 9597 }, { "epoch": 2.4292584155909895, "grad_norm": 0.19094185531139374, "learning_rate": 3.53419533343346e-06, "loss": 0.4638, "step": 9598 }, { "epoch": 2.4295115160718805, "grad_norm": 0.18257848918437958, "learning_rate": 3.532369018724331e-06, "loss": 0.4502, "step": 9599 }, { "epoch": 2.4297646165527715, "grad_norm": 0.18326053023338318, "learning_rate": 3.530543074790769e-06, "loss": 0.4278, "step": 9600 }, { "epoch": 2.4300177170336625, "grad_norm": 0.19164720177650452, "learning_rate": 3.5287175017374464e-06, "loss": 0.4595, "step": 9601 }, { "epoch": 2.430270817514553, "grad_norm": 0.18603092432022095, "learning_rate": 3.5268922996690215e-06, "loss": 0.4399, "step": 9602 }, { "epoch": 2.430523917995444, "grad_norm": 0.17698465287685394, "learning_rate": 3.525067468690128e-06, "loss": 0.4323, "step": 9603 }, { "epoch": 2.430777018476335, "grad_norm": 0.1874256432056427, "learning_rate": 3.523243008905384e-06, "loss": 0.4467, "step": 9604 }, { "epoch": 2.431030118957226, "grad_norm": 0.18425339460372925, "learning_rate": 3.521418920419383e-06, "loss": 0.4335, "step": 9605 }, { "epoch": 2.431283219438117, "grad_norm": 0.18839867413043976, "learning_rate": 3.5195952033366886e-06, "loss": 0.4441, "step": 9606 }, { "epoch": 2.431536319919008, "grad_norm": 0.18863965570926666, "learning_rate": 3.5177718577618556e-06, "loss": 0.4525, "step": 9607 }, { "epoch": 2.4317894203998986, "grad_norm": 0.1876945048570633, "learning_rate": 3.5159488837994115e-06, "loss": 0.464, "step": 9608 }, { "epoch": 2.4320425208807896, "grad_norm": 0.24171103537082672, "learning_rate": 3.5141262815538633e-06, "loss": 0.4557, "step": 9609 }, { "epoch": 2.4322956213616806, "grad_norm": 0.1872011125087738, "learning_rate": 3.5123040511296945e-06, "loss": 0.4626, "step": 9610 }, { "epoch": 2.4325487218425716, "grad_norm": 0.18411123752593994, "learning_rate": 3.510482192631379e-06, "loss": 0.4351, "step": 9611 }, { "epoch": 2.432801822323462, "grad_norm": 0.1806201934814453, "learning_rate": 3.5086607061633502e-06, "loss": 0.4446, "step": 9612 }, { "epoch": 2.433054922804353, "grad_norm": 0.17689597606658936, "learning_rate": 3.5068395918300336e-06, "loss": 0.4523, "step": 9613 }, { "epoch": 2.433308023285244, "grad_norm": 0.18480627238750458, "learning_rate": 3.5050188497358285e-06, "loss": 0.4541, "step": 9614 }, { "epoch": 2.433561123766135, "grad_norm": 0.1933717280626297, "learning_rate": 3.503198479985116e-06, "loss": 0.4401, "step": 9615 }, { "epoch": 2.433814224247026, "grad_norm": 0.18723352253437042, "learning_rate": 3.5013784826822504e-06, "loss": 0.4683, "step": 9616 }, { "epoch": 2.434067324727917, "grad_norm": 0.1864197552204132, "learning_rate": 3.4995588579315775e-06, "loss": 0.4257, "step": 9617 }, { "epoch": 2.4343204252088078, "grad_norm": 0.1868865042924881, "learning_rate": 3.4977396058374034e-06, "loss": 0.4426, "step": 9618 }, { "epoch": 2.4345735256896988, "grad_norm": 0.18360814452171326, "learning_rate": 3.495920726504024e-06, "loss": 0.4548, "step": 9619 }, { "epoch": 2.4348266261705898, "grad_norm": 0.18612436950206757, "learning_rate": 3.494102220035713e-06, "loss": 0.4244, "step": 9620 }, { "epoch": 2.4350797266514808, "grad_norm": 0.18704311549663544, "learning_rate": 3.4922840865367212e-06, "loss": 0.4532, "step": 9621 }, { "epoch": 2.4353328271323713, "grad_norm": 0.18466337025165558, "learning_rate": 3.4904663261112793e-06, "loss": 0.454, "step": 9622 }, { "epoch": 2.4355859276132623, "grad_norm": 0.1812952756881714, "learning_rate": 3.4886489388635935e-06, "loss": 0.4137, "step": 9623 }, { "epoch": 2.4358390280941533, "grad_norm": 0.19034729897975922, "learning_rate": 3.4868319248978524e-06, "loss": 0.469, "step": 9624 }, { "epoch": 2.4360921285750443, "grad_norm": 0.178182914853096, "learning_rate": 3.4850152843182193e-06, "loss": 0.437, "step": 9625 }, { "epoch": 2.4363452290559353, "grad_norm": 0.18654285371303558, "learning_rate": 3.483199017228842e-06, "loss": 0.4398, "step": 9626 }, { "epoch": 2.4365983295368263, "grad_norm": 0.1862965077161789, "learning_rate": 3.4813831237338402e-06, "loss": 0.45, "step": 9627 }, { "epoch": 2.436851430017717, "grad_norm": 0.1844399869441986, "learning_rate": 3.4795676039373174e-06, "loss": 0.4661, "step": 9628 }, { "epoch": 2.437104530498608, "grad_norm": 0.18553486466407776, "learning_rate": 3.477752457943352e-06, "loss": 0.453, "step": 9629 }, { "epoch": 2.437357630979499, "grad_norm": 0.18927212059497833, "learning_rate": 3.4759376858560024e-06, "loss": 0.4605, "step": 9630 }, { "epoch": 2.43761073146039, "grad_norm": 0.1839882731437683, "learning_rate": 3.4741232877793074e-06, "loss": 0.4581, "step": 9631 }, { "epoch": 2.437863831941281, "grad_norm": 0.18383565545082092, "learning_rate": 3.4723092638172805e-06, "loss": 0.447, "step": 9632 }, { "epoch": 2.4381169324221714, "grad_norm": 0.18433067202568054, "learning_rate": 3.4704956140739198e-06, "loss": 0.4568, "step": 9633 }, { "epoch": 2.4383700329030624, "grad_norm": 0.18352359533309937, "learning_rate": 3.4686823386531886e-06, "loss": 0.4548, "step": 9634 }, { "epoch": 2.4386231333839534, "grad_norm": 0.1822059154510498, "learning_rate": 3.4668694376590473e-06, "loss": 0.4417, "step": 9635 }, { "epoch": 2.4388762338648444, "grad_norm": 0.18684431910514832, "learning_rate": 3.4650569111954234e-06, "loss": 0.4497, "step": 9636 }, { "epoch": 2.4391293343457354, "grad_norm": 0.1841500699520111, "learning_rate": 3.4632447593662244e-06, "loss": 0.459, "step": 9637 }, { "epoch": 2.439382434826626, "grad_norm": 0.1821271926164627, "learning_rate": 3.4614329822753365e-06, "loss": 0.4458, "step": 9638 }, { "epoch": 2.439635535307517, "grad_norm": 0.18369203805923462, "learning_rate": 3.4596215800266297e-06, "loss": 0.4366, "step": 9639 }, { "epoch": 2.439888635788408, "grad_norm": 0.1886109560728073, "learning_rate": 3.4578105527239416e-06, "loss": 0.478, "step": 9640 }, { "epoch": 2.440141736269299, "grad_norm": 0.18254804611206055, "learning_rate": 3.455999900471092e-06, "loss": 0.4654, "step": 9641 }, { "epoch": 2.44039483675019, "grad_norm": 0.18388397991657257, "learning_rate": 3.4541896233718907e-06, "loss": 0.4604, "step": 9642 }, { "epoch": 2.4406479372310805, "grad_norm": 0.18383893370628357, "learning_rate": 3.4523797215301137e-06, "loss": 0.4572, "step": 9643 }, { "epoch": 2.4409010377119715, "grad_norm": 0.1838883012533188, "learning_rate": 3.4505701950495205e-06, "loss": 0.4298, "step": 9644 }, { "epoch": 2.4411541381928625, "grad_norm": 0.18129873275756836, "learning_rate": 3.4487610440338424e-06, "loss": 0.4493, "step": 9645 }, { "epoch": 2.4414072386737535, "grad_norm": 0.20824487507343292, "learning_rate": 3.4469522685867963e-06, "loss": 0.4212, "step": 9646 }, { "epoch": 2.4416603391546445, "grad_norm": 0.1808096468448639, "learning_rate": 3.445143868812073e-06, "loss": 0.4361, "step": 9647 }, { "epoch": 2.4419134396355355, "grad_norm": 0.18641968071460724, "learning_rate": 3.4433358448133512e-06, "loss": 0.4697, "step": 9648 }, { "epoch": 2.442166540116426, "grad_norm": 0.18693755567073822, "learning_rate": 3.441528196694276e-06, "loss": 0.4361, "step": 9649 }, { "epoch": 2.442419640597317, "grad_norm": 0.18713784217834473, "learning_rate": 3.439720924558482e-06, "loss": 0.443, "step": 9650 }, { "epoch": 2.442672741078208, "grad_norm": 0.2106381505727768, "learning_rate": 3.4379140285095668e-06, "loss": 0.4623, "step": 9651 }, { "epoch": 2.442925841559099, "grad_norm": 0.18999581038951874, "learning_rate": 3.4361075086511207e-06, "loss": 0.4537, "step": 9652 }, { "epoch": 2.4431789420399896, "grad_norm": 0.18854109942913055, "learning_rate": 3.4343013650867087e-06, "loss": 0.4667, "step": 9653 }, { "epoch": 2.4434320425208806, "grad_norm": 0.2946581244468689, "learning_rate": 3.432495597919867e-06, "loss": 0.4405, "step": 9654 }, { "epoch": 2.4436851430017716, "grad_norm": 0.18733111023902893, "learning_rate": 3.430690207254129e-06, "loss": 0.4731, "step": 9655 }, { "epoch": 2.4439382434826626, "grad_norm": 0.18217481672763824, "learning_rate": 3.4288851931929823e-06, "loss": 0.4448, "step": 9656 }, { "epoch": 2.4441913439635536, "grad_norm": 0.18588267266750336, "learning_rate": 3.427080555839909e-06, "loss": 0.4628, "step": 9657 }, { "epoch": 2.4444444444444446, "grad_norm": 0.18070441484451294, "learning_rate": 3.4252762952983654e-06, "loss": 0.446, "step": 9658 }, { "epoch": 2.444697544925335, "grad_norm": 0.18838246166706085, "learning_rate": 3.4234724116717843e-06, "loss": 0.4583, "step": 9659 }, { "epoch": 2.444950645406226, "grad_norm": 0.17891786992549896, "learning_rate": 3.4216689050635766e-06, "loss": 0.4438, "step": 9660 }, { "epoch": 2.445203745887117, "grad_norm": 0.18389657139778137, "learning_rate": 3.4198657755771425e-06, "loss": 0.4583, "step": 9661 }, { "epoch": 2.445456846368008, "grad_norm": 0.1857808530330658, "learning_rate": 3.418063023315842e-06, "loss": 0.4602, "step": 9662 }, { "epoch": 2.445709946848899, "grad_norm": 0.18251685798168182, "learning_rate": 3.4162606483830272e-06, "loss": 0.4394, "step": 9663 }, { "epoch": 2.4459630473297898, "grad_norm": 0.1816311478614807, "learning_rate": 3.4144586508820233e-06, "loss": 0.4597, "step": 9664 }, { "epoch": 2.4462161478106808, "grad_norm": 0.18861760199069977, "learning_rate": 3.4126570309161357e-06, "loss": 0.4383, "step": 9665 }, { "epoch": 2.4464692482915718, "grad_norm": 0.19064950942993164, "learning_rate": 3.4108557885886463e-06, "loss": 0.4568, "step": 9666 }, { "epoch": 2.4467223487724628, "grad_norm": 0.18606320023536682, "learning_rate": 3.409054924002818e-06, "loss": 0.4614, "step": 9667 }, { "epoch": 2.4469754492533538, "grad_norm": 0.186802476644516, "learning_rate": 3.40725443726189e-06, "loss": 0.4752, "step": 9668 }, { "epoch": 2.4472285497342443, "grad_norm": 0.1860111802816391, "learning_rate": 3.40545432846908e-06, "loss": 0.4597, "step": 9669 }, { "epoch": 2.4474816502151353, "grad_norm": 0.18825821578502655, "learning_rate": 3.403654597727585e-06, "loss": 0.4495, "step": 9670 }, { "epoch": 2.4477347506960263, "grad_norm": 0.1829221546649933, "learning_rate": 3.4018552451405797e-06, "loss": 0.4643, "step": 9671 }, { "epoch": 2.4479878511769173, "grad_norm": 0.1839562952518463, "learning_rate": 3.400056270811216e-06, "loss": 0.4397, "step": 9672 }, { "epoch": 2.4482409516578083, "grad_norm": 0.18507002294063568, "learning_rate": 3.3982576748426264e-06, "loss": 0.4792, "step": 9673 }, { "epoch": 2.448494052138699, "grad_norm": 0.18483738601207733, "learning_rate": 3.3964594573379206e-06, "loss": 0.4401, "step": 9674 }, { "epoch": 2.44874715261959, "grad_norm": 0.1768723428249359, "learning_rate": 3.394661618400187e-06, "loss": 0.4374, "step": 9675 }, { "epoch": 2.449000253100481, "grad_norm": 0.18203230202198029, "learning_rate": 3.3928641581324907e-06, "loss": 0.4593, "step": 9676 }, { "epoch": 2.449253353581372, "grad_norm": 0.18485267460346222, "learning_rate": 3.3910670766378772e-06, "loss": 0.4394, "step": 9677 }, { "epoch": 2.449506454062263, "grad_norm": 0.18622247874736786, "learning_rate": 3.389270374019369e-06, "loss": 0.4408, "step": 9678 }, { "epoch": 2.449759554543154, "grad_norm": 0.18401874601840973, "learning_rate": 3.3874740503799666e-06, "loss": 0.4419, "step": 9679 }, { "epoch": 2.4500126550240444, "grad_norm": 0.21295438706874847, "learning_rate": 3.385678105822651e-06, "loss": 0.4738, "step": 9680 }, { "epoch": 2.4502657555049354, "grad_norm": 0.1843212991952896, "learning_rate": 3.3838825404503785e-06, "loss": 0.4641, "step": 9681 }, { "epoch": 2.4505188559858264, "grad_norm": 0.18124623596668243, "learning_rate": 3.382087354366086e-06, "loss": 0.4434, "step": 9682 }, { "epoch": 2.4507719564667174, "grad_norm": 0.1888759285211563, "learning_rate": 3.38029254767269e-06, "loss": 0.4495, "step": 9683 }, { "epoch": 2.451025056947608, "grad_norm": 0.1861332654953003, "learning_rate": 3.378498120473077e-06, "loss": 0.4168, "step": 9684 }, { "epoch": 2.451278157428499, "grad_norm": 0.18031662702560425, "learning_rate": 3.3767040728701186e-06, "loss": 0.4516, "step": 9685 }, { "epoch": 2.45153125790939, "grad_norm": 0.18578703701496124, "learning_rate": 3.374910404966668e-06, "loss": 0.456, "step": 9686 }, { "epoch": 2.451784358390281, "grad_norm": 0.1846402883529663, "learning_rate": 3.3731171168655506e-06, "loss": 0.462, "step": 9687 }, { "epoch": 2.452037458871172, "grad_norm": 0.2116377055644989, "learning_rate": 3.371324208669573e-06, "loss": 0.4554, "step": 9688 }, { "epoch": 2.452290559352063, "grad_norm": 0.18312495946884155, "learning_rate": 3.3695316804815194e-06, "loss": 0.4388, "step": 9689 }, { "epoch": 2.4525436598329535, "grad_norm": 0.1861317753791809, "learning_rate": 3.367739532404147e-06, "loss": 0.4574, "step": 9690 }, { "epoch": 2.4527967603138445, "grad_norm": 0.1861131191253662, "learning_rate": 3.365947764540195e-06, "loss": 0.437, "step": 9691 }, { "epoch": 2.4530498607947355, "grad_norm": 0.19199474155902863, "learning_rate": 3.364156376992389e-06, "loss": 0.4702, "step": 9692 }, { "epoch": 2.4533029612756265, "grad_norm": 0.18382039666175842, "learning_rate": 3.3623653698634216e-06, "loss": 0.4506, "step": 9693 }, { "epoch": 2.4535560617565175, "grad_norm": 0.18418270349502563, "learning_rate": 3.360574743255971e-06, "loss": 0.4558, "step": 9694 }, { "epoch": 2.453809162237408, "grad_norm": 0.183508038520813, "learning_rate": 3.3587844972726823e-06, "loss": 0.4178, "step": 9695 }, { "epoch": 2.454062262718299, "grad_norm": 0.18013517558574677, "learning_rate": 3.3569946320161917e-06, "loss": 0.4346, "step": 9696 }, { "epoch": 2.45431536319919, "grad_norm": 0.18426954746246338, "learning_rate": 3.3552051475891066e-06, "loss": 0.452, "step": 9697 }, { "epoch": 2.454568463680081, "grad_norm": 0.17910541594028473, "learning_rate": 3.3534160440940123e-06, "loss": 0.4435, "step": 9698 }, { "epoch": 2.454821564160972, "grad_norm": 0.18865260481834412, "learning_rate": 3.3516273216334804e-06, "loss": 0.4681, "step": 9699 }, { "epoch": 2.4550746646418626, "grad_norm": 0.17858724296092987, "learning_rate": 3.349838980310055e-06, "loss": 0.4425, "step": 9700 }, { "epoch": 2.4553277651227536, "grad_norm": 0.1910606324672699, "learning_rate": 3.3480510202262505e-06, "loss": 0.4695, "step": 9701 }, { "epoch": 2.4555808656036446, "grad_norm": 0.1797039657831192, "learning_rate": 3.346263441484571e-06, "loss": 0.4164, "step": 9702 }, { "epoch": 2.4558339660845356, "grad_norm": 0.1863757073879242, "learning_rate": 3.3444762441874934e-06, "loss": 0.4619, "step": 9703 }, { "epoch": 2.4560870665654266, "grad_norm": 0.1804850995540619, "learning_rate": 3.342689428437472e-06, "loss": 0.4238, "step": 9704 }, { "epoch": 2.456340167046317, "grad_norm": 0.183669313788414, "learning_rate": 3.3409029943369507e-06, "loss": 0.4509, "step": 9705 }, { "epoch": 2.456593267527208, "grad_norm": 0.18709595501422882, "learning_rate": 3.339116941988332e-06, "loss": 0.4605, "step": 9706 }, { "epoch": 2.456846368008099, "grad_norm": 0.2203434258699417, "learning_rate": 3.3373312714940085e-06, "loss": 0.4745, "step": 9707 }, { "epoch": 2.45709946848899, "grad_norm": 0.18127106130123138, "learning_rate": 3.335545982956352e-06, "loss": 0.4519, "step": 9708 }, { "epoch": 2.457352568969881, "grad_norm": 0.17491835355758667, "learning_rate": 3.333761076477705e-06, "loss": 0.4372, "step": 9709 }, { "epoch": 2.457605669450772, "grad_norm": 0.18511024117469788, "learning_rate": 3.3319765521603964e-06, "loss": 0.4393, "step": 9710 }, { "epoch": 2.4578587699316627, "grad_norm": 0.18701249361038208, "learning_rate": 3.3301924101067275e-06, "loss": 0.4612, "step": 9711 }, { "epoch": 2.4581118704125537, "grad_norm": 0.18671417236328125, "learning_rate": 3.3284086504189794e-06, "loss": 0.4337, "step": 9712 }, { "epoch": 2.4583649708934447, "grad_norm": 0.1861875057220459, "learning_rate": 3.3266252731994108e-06, "loss": 0.442, "step": 9713 }, { "epoch": 2.4586180713743357, "grad_norm": 0.18268518149852753, "learning_rate": 3.32484227855026e-06, "loss": 0.4501, "step": 9714 }, { "epoch": 2.4588711718552263, "grad_norm": 0.18294842541217804, "learning_rate": 3.323059666573741e-06, "loss": 0.4596, "step": 9715 }, { "epoch": 2.4591242723361173, "grad_norm": 0.1810551881790161, "learning_rate": 3.321277437372048e-06, "loss": 0.4371, "step": 9716 }, { "epoch": 2.4593773728170083, "grad_norm": 0.17945487797260284, "learning_rate": 3.3194955910473525e-06, "loss": 0.4792, "step": 9717 }, { "epoch": 2.4596304732978993, "grad_norm": 0.18614359200000763, "learning_rate": 3.317714127701804e-06, "loss": 0.4602, "step": 9718 }, { "epoch": 2.4598835737787903, "grad_norm": 0.19939681887626648, "learning_rate": 3.3159330474375294e-06, "loss": 0.4688, "step": 9719 }, { "epoch": 2.4601366742596813, "grad_norm": 0.1857181191444397, "learning_rate": 3.3141523503566353e-06, "loss": 0.4387, "step": 9720 }, { "epoch": 2.460389774740572, "grad_norm": 0.18556882441043854, "learning_rate": 3.3123720365612043e-06, "loss": 0.4554, "step": 9721 }, { "epoch": 2.460642875221463, "grad_norm": 0.18572351336479187, "learning_rate": 3.3105921061532986e-06, "loss": 0.4612, "step": 9722 }, { "epoch": 2.460895975702354, "grad_norm": 0.18346506357192993, "learning_rate": 3.3088125592349575e-06, "loss": 0.4419, "step": 9723 }, { "epoch": 2.461149076183245, "grad_norm": 0.18784499168395996, "learning_rate": 3.307033395908199e-06, "loss": 0.4622, "step": 9724 }, { "epoch": 2.461402176664136, "grad_norm": 0.18471379578113556, "learning_rate": 3.3052546162750177e-06, "loss": 0.4679, "step": 9725 }, { "epoch": 2.4616552771450264, "grad_norm": 0.1873440146446228, "learning_rate": 3.3034762204373883e-06, "loss": 0.4753, "step": 9726 }, { "epoch": 2.4619083776259174, "grad_norm": 0.18446621298789978, "learning_rate": 3.301698208497266e-06, "loss": 0.445, "step": 9727 }, { "epoch": 2.4621614781068084, "grad_norm": 0.18203487992286682, "learning_rate": 3.2999205805565727e-06, "loss": 0.4577, "step": 9728 }, { "epoch": 2.4624145785876994, "grad_norm": 0.1873561441898346, "learning_rate": 3.298143336717218e-06, "loss": 0.4534, "step": 9729 }, { "epoch": 2.4626676790685904, "grad_norm": 0.18738245964050293, "learning_rate": 3.296366477081091e-06, "loss": 0.4204, "step": 9730 }, { "epoch": 2.462920779549481, "grad_norm": 0.19791758060455322, "learning_rate": 3.2945900017500555e-06, "loss": 0.4431, "step": 9731 }, { "epoch": 2.463173880030372, "grad_norm": 0.18631838262081146, "learning_rate": 3.2928139108259506e-06, "loss": 0.442, "step": 9732 }, { "epoch": 2.463426980511263, "grad_norm": 0.18606199324131012, "learning_rate": 3.2910382044105992e-06, "loss": 0.4496, "step": 9733 }, { "epoch": 2.463680080992154, "grad_norm": 0.19373813271522522, "learning_rate": 3.2892628826057935e-06, "loss": 0.4478, "step": 9734 }, { "epoch": 2.463933181473045, "grad_norm": 0.18589748442173004, "learning_rate": 3.2874879455133092e-06, "loss": 0.4512, "step": 9735 }, { "epoch": 2.4641862819539355, "grad_norm": 0.1852455586194992, "learning_rate": 3.285713393234905e-06, "loss": 0.4653, "step": 9736 }, { "epoch": 2.4644393824348265, "grad_norm": 0.17708276212215424, "learning_rate": 3.283939225872309e-06, "loss": 0.4372, "step": 9737 }, { "epoch": 2.4646924829157175, "grad_norm": 0.20664365589618683, "learning_rate": 3.282165443527231e-06, "loss": 0.4482, "step": 9738 }, { "epoch": 2.4649455833966085, "grad_norm": 0.18265767395496368, "learning_rate": 3.2803920463013605e-06, "loss": 0.4255, "step": 9739 }, { "epoch": 2.4651986838774995, "grad_norm": 0.21004746854305267, "learning_rate": 3.2786190342963574e-06, "loss": 0.4659, "step": 9740 }, { "epoch": 2.4654517843583905, "grad_norm": 0.19216899573802948, "learning_rate": 3.2768464076138672e-06, "loss": 0.45, "step": 9741 }, { "epoch": 2.465704884839281, "grad_norm": 0.1817542314529419, "learning_rate": 3.2750741663555085e-06, "loss": 0.4432, "step": 9742 }, { "epoch": 2.465957985320172, "grad_norm": 0.19607919454574585, "learning_rate": 3.2733023106228858e-06, "loss": 0.4635, "step": 9743 }, { "epoch": 2.466211085801063, "grad_norm": 0.19682277739048004, "learning_rate": 3.271530840517575e-06, "loss": 0.4723, "step": 9744 }, { "epoch": 2.466464186281954, "grad_norm": 0.17888541519641876, "learning_rate": 3.2697597561411256e-06, "loss": 0.4471, "step": 9745 }, { "epoch": 2.4667172867628446, "grad_norm": 0.18548081815242767, "learning_rate": 3.2679890575950723e-06, "loss": 0.4412, "step": 9746 }, { "epoch": 2.4669703872437356, "grad_norm": 0.19930952787399292, "learning_rate": 3.266218744980927e-06, "loss": 0.4352, "step": 9747 }, { "epoch": 2.4672234877246266, "grad_norm": 0.18074792623519897, "learning_rate": 3.2644488184001768e-06, "loss": 0.4162, "step": 9748 }, { "epoch": 2.4674765882055176, "grad_norm": 0.18753167986869812, "learning_rate": 3.262679277954285e-06, "loss": 0.4472, "step": 9749 }, { "epoch": 2.4677296886864086, "grad_norm": 0.18739210069179535, "learning_rate": 3.260910123744705e-06, "loss": 0.4633, "step": 9750 }, { "epoch": 2.4679827891672996, "grad_norm": 0.17698386311531067, "learning_rate": 3.2591413558728493e-06, "loss": 0.4475, "step": 9751 }, { "epoch": 2.46823588964819, "grad_norm": 0.1828993558883667, "learning_rate": 3.2573729744401195e-06, "loss": 0.4347, "step": 9752 }, { "epoch": 2.468488990129081, "grad_norm": 0.1945481151342392, "learning_rate": 3.2556049795478962e-06, "loss": 0.4328, "step": 9753 }, { "epoch": 2.468742090609972, "grad_norm": 0.18375836312770844, "learning_rate": 3.253837371297531e-06, "loss": 0.4361, "step": 9754 }, { "epoch": 2.468995191090863, "grad_norm": 0.17511983215808868, "learning_rate": 3.2520701497903607e-06, "loss": 0.4695, "step": 9755 }, { "epoch": 2.4692482915717537, "grad_norm": 0.1802387237548828, "learning_rate": 3.2503033151276943e-06, "loss": 0.399, "step": 9756 }, { "epoch": 2.4695013920526447, "grad_norm": 0.19294790923595428, "learning_rate": 3.248536867410821e-06, "loss": 0.4554, "step": 9757 }, { "epoch": 2.4697544925335357, "grad_norm": 0.1887184977531433, "learning_rate": 3.2467708067410075e-06, "loss": 0.4351, "step": 9758 }, { "epoch": 2.4700075930144267, "grad_norm": 0.18206898868083954, "learning_rate": 3.2450051332194977e-06, "loss": 0.4471, "step": 9759 }, { "epoch": 2.4702606934953177, "grad_norm": 0.18090690672397614, "learning_rate": 3.2432398469475157e-06, "loss": 0.418, "step": 9760 }, { "epoch": 2.4705137939762087, "grad_norm": 0.18949437141418457, "learning_rate": 3.2414749480262597e-06, "loss": 0.4482, "step": 9761 }, { "epoch": 2.4707668944570993, "grad_norm": 0.18144941329956055, "learning_rate": 3.2397104365569087e-06, "loss": 0.447, "step": 9762 }, { "epoch": 2.4710199949379903, "grad_norm": 0.186555877327919, "learning_rate": 3.2379463126406185e-06, "loss": 0.4803, "step": 9763 }, { "epoch": 2.4712730954188813, "grad_norm": 0.18178676068782806, "learning_rate": 3.2361825763785224e-06, "loss": 0.4273, "step": 9764 }, { "epoch": 2.4715261958997723, "grad_norm": 0.18077288568019867, "learning_rate": 3.234419227871731e-06, "loss": 0.475, "step": 9765 }, { "epoch": 2.4717792963806633, "grad_norm": 0.18698133528232574, "learning_rate": 3.2326562672213346e-06, "loss": 0.4517, "step": 9766 }, { "epoch": 2.472032396861554, "grad_norm": 0.18401643633842468, "learning_rate": 3.2308936945283987e-06, "loss": 0.4305, "step": 9767 }, { "epoch": 2.472285497342445, "grad_norm": 0.18152743577957153, "learning_rate": 3.229131509893968e-06, "loss": 0.4461, "step": 9768 }, { "epoch": 2.472538597823336, "grad_norm": 0.18482622504234314, "learning_rate": 3.2273697134190653e-06, "loss": 0.4456, "step": 9769 }, { "epoch": 2.472791698304227, "grad_norm": 0.18731287121772766, "learning_rate": 3.2256083052046906e-06, "loss": 0.4611, "step": 9770 }, { "epoch": 2.473044798785118, "grad_norm": 0.1828138679265976, "learning_rate": 3.223847285351822e-06, "loss": 0.4506, "step": 9771 }, { "epoch": 2.473297899266009, "grad_norm": 0.18637590110301971, "learning_rate": 3.222086653961417e-06, "loss": 0.435, "step": 9772 }, { "epoch": 2.4735509997468994, "grad_norm": 0.19461746513843536, "learning_rate": 3.2203264111344014e-06, "loss": 0.4588, "step": 9773 }, { "epoch": 2.4738041002277904, "grad_norm": 0.18563808500766754, "learning_rate": 3.218566556971694e-06, "loss": 0.4452, "step": 9774 }, { "epoch": 2.4740572007086814, "grad_norm": 0.17783161997795105, "learning_rate": 3.21680709157418e-06, "loss": 0.4318, "step": 9775 }, { "epoch": 2.4743103011895724, "grad_norm": 0.18855345249176025, "learning_rate": 3.2150480150427277e-06, "loss": 0.4623, "step": 9776 }, { "epoch": 2.474563401670463, "grad_norm": 0.18631193041801453, "learning_rate": 3.213289327478183e-06, "loss": 0.4408, "step": 9777 }, { "epoch": 2.474816502151354, "grad_norm": 0.18600304424762726, "learning_rate": 3.2115310289813618e-06, "loss": 0.4434, "step": 9778 }, { "epoch": 2.475069602632245, "grad_norm": 0.18601694703102112, "learning_rate": 3.2097731196530655e-06, "loss": 0.4487, "step": 9779 }, { "epoch": 2.475322703113136, "grad_norm": 0.1894472986459732, "learning_rate": 3.2080155995940708e-06, "loss": 0.4226, "step": 9780 }, { "epoch": 2.475575803594027, "grad_norm": 0.18601754307746887, "learning_rate": 3.2062584689051367e-06, "loss": 0.4742, "step": 9781 }, { "epoch": 2.475828904074918, "grad_norm": 0.18280155956745148, "learning_rate": 3.2045017276869937e-06, "loss": 0.4785, "step": 9782 }, { "epoch": 2.4760820045558085, "grad_norm": 0.18432001769542694, "learning_rate": 3.202745376040356e-06, "loss": 0.4697, "step": 9783 }, { "epoch": 2.4763351050366995, "grad_norm": 0.1822393536567688, "learning_rate": 3.2009894140659024e-06, "loss": 0.4414, "step": 9784 }, { "epoch": 2.4765882055175905, "grad_norm": 0.18995630741119385, "learning_rate": 3.1992338418643044e-06, "loss": 0.4829, "step": 9785 }, { "epoch": 2.4768413059984815, "grad_norm": 0.1888495236635208, "learning_rate": 3.1974786595362007e-06, "loss": 0.4382, "step": 9786 }, { "epoch": 2.477094406479372, "grad_norm": 0.1742294579744339, "learning_rate": 3.195723867182219e-06, "loss": 0.4281, "step": 9787 }, { "epoch": 2.477347506960263, "grad_norm": 0.18650834262371063, "learning_rate": 3.1939694649029575e-06, "loss": 0.4545, "step": 9788 }, { "epoch": 2.477600607441154, "grad_norm": 0.19121144711971283, "learning_rate": 3.1922154527989857e-06, "loss": 0.4595, "step": 9789 }, { "epoch": 2.477853707922045, "grad_norm": 0.18312130868434906, "learning_rate": 3.190461830970861e-06, "loss": 0.4341, "step": 9790 }, { "epoch": 2.478106808402936, "grad_norm": 0.1785571128129959, "learning_rate": 3.188708599519116e-06, "loss": 0.438, "step": 9791 }, { "epoch": 2.478359908883827, "grad_norm": 0.18268080055713654, "learning_rate": 3.186955758544258e-06, "loss": 0.4335, "step": 9792 }, { "epoch": 2.4786130093647176, "grad_norm": 0.18247269093990326, "learning_rate": 3.1852033081467702e-06, "loss": 0.4441, "step": 9793 }, { "epoch": 2.4788661098456086, "grad_norm": 0.18360787630081177, "learning_rate": 3.1834512484271285e-06, "loss": 0.4545, "step": 9794 }, { "epoch": 2.4791192103264996, "grad_norm": 0.1833067387342453, "learning_rate": 3.181699579485763e-06, "loss": 0.4459, "step": 9795 }, { "epoch": 2.4793723108073906, "grad_norm": 0.1857370138168335, "learning_rate": 3.179948301423098e-06, "loss": 0.4482, "step": 9796 }, { "epoch": 2.4796254112882816, "grad_norm": 0.1901809573173523, "learning_rate": 3.17819741433953e-06, "loss": 0.4582, "step": 9797 }, { "epoch": 2.479878511769172, "grad_norm": 0.1967715620994568, "learning_rate": 3.176446918335433e-06, "loss": 0.4512, "step": 9798 }, { "epoch": 2.480131612250063, "grad_norm": 0.19134455919265747, "learning_rate": 3.1746968135111565e-06, "loss": 0.4448, "step": 9799 }, { "epoch": 2.480384712730954, "grad_norm": 0.189021497964859, "learning_rate": 3.1729470999670397e-06, "loss": 0.4501, "step": 9800 }, { "epoch": 2.480637813211845, "grad_norm": 0.18744321167469025, "learning_rate": 3.171197777803382e-06, "loss": 0.4622, "step": 9801 }, { "epoch": 2.480890913692736, "grad_norm": 0.18610751628875732, "learning_rate": 3.1694488471204688e-06, "loss": 0.4354, "step": 9802 }, { "epoch": 2.481144014173627, "grad_norm": 0.18708084523677826, "learning_rate": 3.167700308018564e-06, "loss": 0.4752, "step": 9803 }, { "epoch": 2.4813971146545177, "grad_norm": 0.18813864886760712, "learning_rate": 3.1659521605979082e-06, "loss": 0.4439, "step": 9804 }, { "epoch": 2.4816502151354087, "grad_norm": 0.19151608645915985, "learning_rate": 3.1642044049587185e-06, "loss": 0.4652, "step": 9805 }, { "epoch": 2.4819033156162997, "grad_norm": 0.30630072951316833, "learning_rate": 3.162457041201189e-06, "loss": 0.4337, "step": 9806 }, { "epoch": 2.4821564160971907, "grad_norm": 0.1815257966518402, "learning_rate": 3.1607100694254944e-06, "loss": 0.4621, "step": 9807 }, { "epoch": 2.4824095165780813, "grad_norm": 0.1898999810218811, "learning_rate": 3.1589634897317833e-06, "loss": 0.4529, "step": 9808 }, { "epoch": 2.4826626170589723, "grad_norm": 0.18546715378761292, "learning_rate": 3.1572173022201836e-06, "loss": 0.4368, "step": 9809 }, { "epoch": 2.4829157175398633, "grad_norm": 0.18431618809700012, "learning_rate": 3.1554715069908006e-06, "loss": 0.4343, "step": 9810 }, { "epoch": 2.4831688180207543, "grad_norm": 0.18628107011318207, "learning_rate": 3.153726104143717e-06, "loss": 0.4237, "step": 9811 }, { "epoch": 2.4834219185016453, "grad_norm": 0.1831175535917282, "learning_rate": 3.151981093778994e-06, "loss": 0.4321, "step": 9812 }, { "epoch": 2.4836750189825363, "grad_norm": 0.1933259218931198, "learning_rate": 3.1502364759966675e-06, "loss": 0.4524, "step": 9813 }, { "epoch": 2.483928119463427, "grad_norm": 0.1835838109254837, "learning_rate": 3.148492250896755e-06, "loss": 0.4641, "step": 9814 }, { "epoch": 2.484181219944318, "grad_norm": 0.18070180714130402, "learning_rate": 3.1467484185792473e-06, "loss": 0.4148, "step": 9815 }, { "epoch": 2.484434320425209, "grad_norm": 0.19652725756168365, "learning_rate": 3.1450049791441183e-06, "loss": 0.4646, "step": 9816 }, { "epoch": 2.4846874209061, "grad_norm": 0.18477898836135864, "learning_rate": 3.1432619326913072e-06, "loss": 0.4576, "step": 9817 }, { "epoch": 2.4849405213869904, "grad_norm": 0.18408183753490448, "learning_rate": 3.1415192793207473e-06, "loss": 0.4242, "step": 9818 }, { "epoch": 2.4851936218678814, "grad_norm": 0.1834844946861267, "learning_rate": 3.1397770191323397e-06, "loss": 0.447, "step": 9819 }, { "epoch": 2.4854467223487724, "grad_norm": 0.18416836857795715, "learning_rate": 3.138035152225962e-06, "loss": 0.4358, "step": 9820 }, { "epoch": 2.4856998228296634, "grad_norm": 0.18880175054073334, "learning_rate": 3.136293678701473e-06, "loss": 0.4689, "step": 9821 }, { "epoch": 2.4859529233105544, "grad_norm": 0.18869760632514954, "learning_rate": 3.1345525986587123e-06, "loss": 0.459, "step": 9822 }, { "epoch": 2.4862060237914454, "grad_norm": 0.1863379180431366, "learning_rate": 3.1328119121974844e-06, "loss": 0.4368, "step": 9823 }, { "epoch": 2.486459124272336, "grad_norm": 0.18433062732219696, "learning_rate": 3.1310716194175784e-06, "loss": 0.454, "step": 9824 }, { "epoch": 2.486712224753227, "grad_norm": 0.1869802623987198, "learning_rate": 3.1293317204187703e-06, "loss": 0.4694, "step": 9825 }, { "epoch": 2.486965325234118, "grad_norm": 0.17689451575279236, "learning_rate": 3.1275922153007985e-06, "loss": 0.4476, "step": 9826 }, { "epoch": 2.487218425715009, "grad_norm": 0.1803857833147049, "learning_rate": 3.125853104163392e-06, "loss": 0.4505, "step": 9827 }, { "epoch": 2.4874715261959, "grad_norm": 0.17905639111995697, "learning_rate": 3.124114387106241e-06, "loss": 0.4506, "step": 9828 }, { "epoch": 2.4877246266767905, "grad_norm": 0.18597295880317688, "learning_rate": 3.1223760642290266e-06, "loss": 0.4438, "step": 9829 }, { "epoch": 2.4879777271576815, "grad_norm": 0.18978239595890045, "learning_rate": 3.120638135631401e-06, "loss": 0.4646, "step": 9830 }, { "epoch": 2.4882308276385725, "grad_norm": 0.18418936431407928, "learning_rate": 3.118900601413001e-06, "loss": 0.4649, "step": 9831 }, { "epoch": 2.4884839281194635, "grad_norm": 0.18626517057418823, "learning_rate": 3.1171634616734334e-06, "loss": 0.4522, "step": 9832 }, { "epoch": 2.4887370286003545, "grad_norm": 0.18436528742313385, "learning_rate": 3.1154267165122874e-06, "loss": 0.4161, "step": 9833 }, { "epoch": 2.488990129081245, "grad_norm": 0.1887761652469635, "learning_rate": 3.1136903660291205e-06, "loss": 0.4629, "step": 9834 }, { "epoch": 2.489243229562136, "grad_norm": 0.1884230375289917, "learning_rate": 3.1119544103234777e-06, "loss": 0.4614, "step": 9835 }, { "epoch": 2.489496330043027, "grad_norm": 0.18079005181789398, "learning_rate": 3.1102188494948773e-06, "loss": 0.4402, "step": 9836 }, { "epoch": 2.489749430523918, "grad_norm": 0.19314855337142944, "learning_rate": 3.1084836836428114e-06, "loss": 0.4451, "step": 9837 }, { "epoch": 2.490002531004809, "grad_norm": 0.1882188320159912, "learning_rate": 3.106748912866764e-06, "loss": 0.4639, "step": 9838 }, { "epoch": 2.4902556314856996, "grad_norm": 0.18302544951438904, "learning_rate": 3.1050145372661767e-06, "loss": 0.4298, "step": 9839 }, { "epoch": 2.4905087319665906, "grad_norm": 0.19044649600982666, "learning_rate": 3.10328055694048e-06, "loss": 0.45, "step": 9840 }, { "epoch": 2.4907618324474816, "grad_norm": 0.19069914519786835, "learning_rate": 3.1015469719890788e-06, "loss": 0.4425, "step": 9841 }, { "epoch": 2.4910149329283726, "grad_norm": 0.17773745954036713, "learning_rate": 3.099813782511356e-06, "loss": 0.4257, "step": 9842 }, { "epoch": 2.4912680334092636, "grad_norm": 0.2107766568660736, "learning_rate": 3.0980809886066697e-06, "loss": 0.4782, "step": 9843 }, { "epoch": 2.4915211338901546, "grad_norm": 0.18509654700756073, "learning_rate": 3.0963485903743663e-06, "loss": 0.4312, "step": 9844 }, { "epoch": 2.491774234371045, "grad_norm": 0.1830548495054245, "learning_rate": 3.094616587913749e-06, "loss": 0.4191, "step": 9845 }, { "epoch": 2.492027334851936, "grad_norm": 0.18965588510036469, "learning_rate": 3.092884981324117e-06, "loss": 0.4567, "step": 9846 }, { "epoch": 2.492280435332827, "grad_norm": 0.18627087771892548, "learning_rate": 3.0911537707047356e-06, "loss": 0.4539, "step": 9847 }, { "epoch": 2.492533535813718, "grad_norm": 0.18524600565433502, "learning_rate": 3.089422956154854e-06, "loss": 0.4672, "step": 9848 }, { "epoch": 2.4927866362946087, "grad_norm": 0.1828841269016266, "learning_rate": 3.0876925377736956e-06, "loss": 0.4614, "step": 9849 }, { "epoch": 2.4930397367754997, "grad_norm": 0.1850241869688034, "learning_rate": 3.08596251566046e-06, "loss": 0.4649, "step": 9850 }, { "epoch": 2.4932928372563907, "grad_norm": 0.19191959500312805, "learning_rate": 3.0842328899143283e-06, "loss": 0.4313, "step": 9851 }, { "epoch": 2.4935459377372817, "grad_norm": 0.188773050904274, "learning_rate": 3.082503660634455e-06, "loss": 0.4389, "step": 9852 }, { "epoch": 2.4937990382181727, "grad_norm": 0.17980480194091797, "learning_rate": 3.0807748279199723e-06, "loss": 0.4486, "step": 9853 }, { "epoch": 2.4940521386990637, "grad_norm": 0.1870751678943634, "learning_rate": 3.079046391869992e-06, "loss": 0.4382, "step": 9854 }, { "epoch": 2.4943052391799543, "grad_norm": 0.18761655688285828, "learning_rate": 3.0773183525836004e-06, "loss": 0.4317, "step": 9855 }, { "epoch": 2.4945583396608453, "grad_norm": 0.20503035187721252, "learning_rate": 3.0755907101598637e-06, "loss": 0.468, "step": 9856 }, { "epoch": 2.4948114401417363, "grad_norm": 0.1887664794921875, "learning_rate": 3.0738634646978225e-06, "loss": 0.4468, "step": 9857 }, { "epoch": 2.4950645406226273, "grad_norm": 0.18258114159107208, "learning_rate": 3.072136616296497e-06, "loss": 0.456, "step": 9858 }, { "epoch": 2.4953176411035183, "grad_norm": 0.1853690892457962, "learning_rate": 3.0704101650548846e-06, "loss": 0.4542, "step": 9859 }, { "epoch": 2.495570741584409, "grad_norm": 0.1919163167476654, "learning_rate": 3.06868411107196e-06, "loss": 0.4677, "step": 9860 }, { "epoch": 2.4958238420653, "grad_norm": 0.18200209736824036, "learning_rate": 3.066958454446667e-06, "loss": 0.4291, "step": 9861 }, { "epoch": 2.496076942546191, "grad_norm": 0.18517909944057465, "learning_rate": 3.0652331952779413e-06, "loss": 0.4644, "step": 9862 }, { "epoch": 2.496330043027082, "grad_norm": 0.1810602992773056, "learning_rate": 3.0635083336646875e-06, "loss": 0.4309, "step": 9863 }, { "epoch": 2.496583143507973, "grad_norm": 0.1886460781097412, "learning_rate": 3.0617838697057854e-06, "loss": 0.4637, "step": 9864 }, { "epoch": 2.4968362439888634, "grad_norm": 0.18465498089790344, "learning_rate": 3.0600598035000973e-06, "loss": 0.4508, "step": 9865 }, { "epoch": 2.4970893444697544, "grad_norm": 0.18890772759914398, "learning_rate": 3.0583361351464625e-06, "loss": 0.4658, "step": 9866 }, { "epoch": 2.4973424449506454, "grad_norm": 0.19306395947933197, "learning_rate": 3.0566128647436887e-06, "loss": 0.4798, "step": 9867 }, { "epoch": 2.4975955454315364, "grad_norm": 0.18355952203273773, "learning_rate": 3.054889992390567e-06, "loss": 0.4666, "step": 9868 }, { "epoch": 2.4978486459124274, "grad_norm": 0.1860881894826889, "learning_rate": 3.0531675181858746e-06, "loss": 0.4567, "step": 9869 }, { "epoch": 2.498101746393318, "grad_norm": 0.19630080461502075, "learning_rate": 3.051445442228351e-06, "loss": 0.4448, "step": 9870 }, { "epoch": 2.498354846874209, "grad_norm": 0.18540795147418976, "learning_rate": 3.049723764616721e-06, "loss": 0.4312, "step": 9871 }, { "epoch": 2.4986079473551, "grad_norm": 0.19548986852169037, "learning_rate": 3.048002485449688e-06, "loss": 0.4849, "step": 9872 }, { "epoch": 2.498861047835991, "grad_norm": 0.18266990780830383, "learning_rate": 3.0462816048259225e-06, "loss": 0.4418, "step": 9873 }, { "epoch": 2.499114148316882, "grad_norm": 0.18920131027698517, "learning_rate": 3.0445611228440787e-06, "loss": 0.453, "step": 9874 }, { "epoch": 2.499367248797773, "grad_norm": 0.18696339428424835, "learning_rate": 3.042841039602794e-06, "loss": 0.4634, "step": 9875 }, { "epoch": 2.4996203492786635, "grad_norm": 0.1844286024570465, "learning_rate": 3.0411213552006744e-06, "loss": 0.444, "step": 9876 }, { "epoch": 2.4998734497595545, "grad_norm": 0.17990075051784515, "learning_rate": 3.0394020697363103e-06, "loss": 0.4447, "step": 9877 }, { "epoch": 2.5001265502404455, "grad_norm": 0.18288540840148926, "learning_rate": 3.0376831833082576e-06, "loss": 0.4356, "step": 9878 }, { "epoch": 2.5003796507213365, "grad_norm": 0.18815818428993225, "learning_rate": 3.0359646960150578e-06, "loss": 0.451, "step": 9879 }, { "epoch": 2.500632751202227, "grad_norm": 0.19086502492427826, "learning_rate": 3.034246607955231e-06, "loss": 0.4415, "step": 9880 }, { "epoch": 2.500885851683118, "grad_norm": 0.1823539435863495, "learning_rate": 3.0325289192272667e-06, "loss": 0.4403, "step": 9881 }, { "epoch": 2.501138952164009, "grad_norm": 0.18392543494701385, "learning_rate": 3.030811629929642e-06, "loss": 0.467, "step": 9882 }, { "epoch": 2.5013920526449, "grad_norm": 0.18447667360305786, "learning_rate": 3.0290947401608074e-06, "loss": 0.4671, "step": 9883 }, { "epoch": 2.501645153125791, "grad_norm": 0.1881193369626999, "learning_rate": 3.0273782500191807e-06, "loss": 0.4415, "step": 9884 }, { "epoch": 2.501898253606682, "grad_norm": 0.18418070673942566, "learning_rate": 3.0256621596031677e-06, "loss": 0.455, "step": 9885 }, { "epoch": 2.5021513540875726, "grad_norm": 0.18929621577262878, "learning_rate": 3.0239464690111498e-06, "loss": 0.4602, "step": 9886 }, { "epoch": 2.5024044545684636, "grad_norm": 0.18872860074043274, "learning_rate": 3.0222311783414825e-06, "loss": 0.4865, "step": 9887 }, { "epoch": 2.5026575550493546, "grad_norm": 0.18272048234939575, "learning_rate": 3.0205162876925e-06, "loss": 0.4219, "step": 9888 }, { "epoch": 2.5029106555302456, "grad_norm": 0.18917116522789001, "learning_rate": 3.0188017971625137e-06, "loss": 0.4404, "step": 9889 }, { "epoch": 2.503163756011136, "grad_norm": 0.18638162314891815, "learning_rate": 3.0170877068498116e-06, "loss": 0.4871, "step": 9890 }, { "epoch": 2.503416856492027, "grad_norm": 0.18685835599899292, "learning_rate": 3.015374016852659e-06, "loss": 0.4304, "step": 9891 }, { "epoch": 2.503669956972918, "grad_norm": 0.19647647440433502, "learning_rate": 3.0136607272692976e-06, "loss": 0.4284, "step": 9892 }, { "epoch": 2.503923057453809, "grad_norm": 0.1836775690317154, "learning_rate": 3.0119478381979474e-06, "loss": 0.445, "step": 9893 }, { "epoch": 2.5041761579347, "grad_norm": 0.18109674751758575, "learning_rate": 3.010235349736804e-06, "loss": 0.4326, "step": 9894 }, { "epoch": 2.504429258415591, "grad_norm": 0.18217229843139648, "learning_rate": 3.008523261984041e-06, "loss": 0.4411, "step": 9895 }, { "epoch": 2.504682358896482, "grad_norm": 0.18607278168201447, "learning_rate": 3.006811575037809e-06, "loss": 0.4461, "step": 9896 }, { "epoch": 2.5049354593773727, "grad_norm": 0.18706995248794556, "learning_rate": 3.0051002889962344e-06, "loss": 0.4729, "step": 9897 }, { "epoch": 2.5051885598582637, "grad_norm": 0.18636251986026764, "learning_rate": 3.003389403957423e-06, "loss": 0.4451, "step": 9898 }, { "epoch": 2.5054416603391547, "grad_norm": 0.189053475856781, "learning_rate": 3.0016789200194553e-06, "loss": 0.4486, "step": 9899 }, { "epoch": 2.5056947608200457, "grad_norm": 0.19097672402858734, "learning_rate": 2.9999688372803904e-06, "loss": 0.4627, "step": 9900 }, { "epoch": 2.5059478613009363, "grad_norm": 0.19781705737113953, "learning_rate": 2.9982591558382623e-06, "loss": 0.4404, "step": 9901 }, { "epoch": 2.5062009617818273, "grad_norm": 0.18611939251422882, "learning_rate": 2.996549875791086e-06, "loss": 0.4339, "step": 9902 }, { "epoch": 2.5064540622627183, "grad_norm": 0.18664616346359253, "learning_rate": 2.9948409972368487e-06, "loss": 0.446, "step": 9903 }, { "epoch": 2.5067071627436093, "grad_norm": 0.18803881108760834, "learning_rate": 2.9931325202735163e-06, "loss": 0.4612, "step": 9904 }, { "epoch": 2.5069602632245003, "grad_norm": 0.1877565085887909, "learning_rate": 2.991424444999034e-06, "loss": 0.4476, "step": 9905 }, { "epoch": 2.5072133637053913, "grad_norm": 0.1817781925201416, "learning_rate": 2.989716771511322e-06, "loss": 0.4433, "step": 9906 }, { "epoch": 2.507466464186282, "grad_norm": 0.18365634977817535, "learning_rate": 2.9880094999082767e-06, "loss": 0.4373, "step": 9907 }, { "epoch": 2.507719564667173, "grad_norm": 0.18223312497138977, "learning_rate": 2.986302630287772e-06, "loss": 0.4491, "step": 9908 }, { "epoch": 2.507972665148064, "grad_norm": 0.1875292807817459, "learning_rate": 2.9845961627476595e-06, "loss": 0.4623, "step": 9909 }, { "epoch": 2.508225765628955, "grad_norm": 0.1863904893398285, "learning_rate": 2.9828900973857712e-06, "loss": 0.4478, "step": 9910 }, { "epoch": 2.5084788661098454, "grad_norm": 0.17941059172153473, "learning_rate": 2.9811844342999064e-06, "loss": 0.4426, "step": 9911 }, { "epoch": 2.5087319665907364, "grad_norm": 0.18598681688308716, "learning_rate": 2.979479173587845e-06, "loss": 0.46, "step": 9912 }, { "epoch": 2.5089850670716274, "grad_norm": 0.19004683196544647, "learning_rate": 2.9777743153473536e-06, "loss": 0.4586, "step": 9913 }, { "epoch": 2.5092381675525184, "grad_norm": 0.18440799415111542, "learning_rate": 2.976069859676165e-06, "loss": 0.4303, "step": 9914 }, { "epoch": 2.5094912680334094, "grad_norm": 0.18430593609809875, "learning_rate": 2.974365806671993e-06, "loss": 0.4852, "step": 9915 }, { "epoch": 2.5097443685143004, "grad_norm": 0.18341735005378723, "learning_rate": 2.9726621564325275e-06, "loss": 0.4596, "step": 9916 }, { "epoch": 2.509997468995191, "grad_norm": 0.18389463424682617, "learning_rate": 2.9709589090554323e-06, "loss": 0.4636, "step": 9917 }, { "epoch": 2.510250569476082, "grad_norm": 0.19123446941375732, "learning_rate": 2.9692560646383515e-06, "loss": 0.4378, "step": 9918 }, { "epoch": 2.510503669956973, "grad_norm": 0.23274357616901398, "learning_rate": 2.9675536232789036e-06, "loss": 0.4423, "step": 9919 }, { "epoch": 2.510756770437864, "grad_norm": 0.1857619434595108, "learning_rate": 2.965851585074693e-06, "loss": 0.4561, "step": 9920 }, { "epoch": 2.5110098709187545, "grad_norm": 0.1846134513616562, "learning_rate": 2.964149950123292e-06, "loss": 0.4513, "step": 9921 }, { "epoch": 2.5112629713996455, "grad_norm": 0.18035225570201874, "learning_rate": 2.962448718522247e-06, "loss": 0.4699, "step": 9922 }, { "epoch": 2.5115160718805365, "grad_norm": 0.18082153797149658, "learning_rate": 2.960747890369089e-06, "loss": 0.4491, "step": 9923 }, { "epoch": 2.5117691723614275, "grad_norm": 0.19531063735485077, "learning_rate": 2.959047465761321e-06, "loss": 0.4657, "step": 9924 }, { "epoch": 2.5120222728423185, "grad_norm": 0.18180342018604279, "learning_rate": 2.9573474447964246e-06, "loss": 0.44, "step": 9925 }, { "epoch": 2.5122753733232095, "grad_norm": 0.17777934670448303, "learning_rate": 2.9556478275718625e-06, "loss": 0.4454, "step": 9926 }, { "epoch": 2.5125284738041005, "grad_norm": 0.19637848436832428, "learning_rate": 2.9539486141850705e-06, "loss": 0.4538, "step": 9927 }, { "epoch": 2.512781574284991, "grad_norm": 0.18627901375293732, "learning_rate": 2.9522498047334557e-06, "loss": 0.4391, "step": 9928 }, { "epoch": 2.513034674765882, "grad_norm": 0.18851062655448914, "learning_rate": 2.9505513993144086e-06, "loss": 0.5023, "step": 9929 }, { "epoch": 2.513287775246773, "grad_norm": 0.18373793363571167, "learning_rate": 2.9488533980252966e-06, "loss": 0.4582, "step": 9930 }, { "epoch": 2.513540875727664, "grad_norm": 0.18420156836509705, "learning_rate": 2.947155800963463e-06, "loss": 0.4417, "step": 9931 }, { "epoch": 2.5137939762085546, "grad_norm": 0.19234129786491394, "learning_rate": 2.945458608226223e-06, "loss": 0.456, "step": 9932 }, { "epoch": 2.5140470766894456, "grad_norm": 0.18840764462947845, "learning_rate": 2.943761819910882e-06, "loss": 0.4399, "step": 9933 }, { "epoch": 2.5143001771703366, "grad_norm": 0.18655677139759064, "learning_rate": 2.942065436114705e-06, "loss": 0.4565, "step": 9934 }, { "epoch": 2.5145532776512276, "grad_norm": 0.1884969025850296, "learning_rate": 2.9403694569349452e-06, "loss": 0.459, "step": 9935 }, { "epoch": 2.5148063781321186, "grad_norm": 0.18425942957401276, "learning_rate": 2.9386738824688288e-06, "loss": 0.4615, "step": 9936 }, { "epoch": 2.5150594786130096, "grad_norm": 0.18491524457931519, "learning_rate": 2.9369787128135606e-06, "loss": 0.4526, "step": 9937 }, { "epoch": 2.5153125790939, "grad_norm": 0.19618448615074158, "learning_rate": 2.935283948066321e-06, "loss": 0.4696, "step": 9938 }, { "epoch": 2.515565679574791, "grad_norm": 0.18791699409484863, "learning_rate": 2.933589588324265e-06, "loss": 0.4693, "step": 9939 }, { "epoch": 2.515818780055682, "grad_norm": 0.18935619294643402, "learning_rate": 2.9318956336845297e-06, "loss": 0.4582, "step": 9940 }, { "epoch": 2.516071880536573, "grad_norm": 0.18320775032043457, "learning_rate": 2.9302020842442236e-06, "loss": 0.4574, "step": 9941 }, { "epoch": 2.5163249810174637, "grad_norm": 0.18784551322460175, "learning_rate": 2.9285089401004363e-06, "loss": 0.4577, "step": 9942 }, { "epoch": 2.5165780814983547, "grad_norm": 0.2972661256790161, "learning_rate": 2.92681620135023e-06, "loss": 0.4458, "step": 9943 }, { "epoch": 2.5168311819792457, "grad_norm": 0.1889571249485016, "learning_rate": 2.925123868090647e-06, "loss": 0.4472, "step": 9944 }, { "epoch": 2.5170842824601367, "grad_norm": 0.18259693682193756, "learning_rate": 2.9234319404187062e-06, "loss": 0.4522, "step": 9945 }, { "epoch": 2.5173373829410277, "grad_norm": 0.1856050342321396, "learning_rate": 2.9217404184314e-06, "loss": 0.4392, "step": 9946 }, { "epoch": 2.5175904834219187, "grad_norm": 0.18250569701194763, "learning_rate": 2.9200493022257016e-06, "loss": 0.4592, "step": 9947 }, { "epoch": 2.5178435839028093, "grad_norm": 0.18822064995765686, "learning_rate": 2.918358591898558e-06, "loss": 0.4863, "step": 9948 }, { "epoch": 2.5180966843837003, "grad_norm": 0.20214034616947174, "learning_rate": 2.9166682875468976e-06, "loss": 0.4434, "step": 9949 }, { "epoch": 2.5183497848645913, "grad_norm": 0.188872292637825, "learning_rate": 2.914978389267613e-06, "loss": 0.4534, "step": 9950 }, { "epoch": 2.5186028853454823, "grad_norm": 0.19289550185203552, "learning_rate": 2.9132888971575913e-06, "loss": 0.4666, "step": 9951 }, { "epoch": 2.518855985826373, "grad_norm": 0.18232470750808716, "learning_rate": 2.9115998113136847e-06, "loss": 0.4445, "step": 9952 }, { "epoch": 2.519109086307264, "grad_norm": 0.18542706966400146, "learning_rate": 2.9099111318327233e-06, "loss": 0.4438, "step": 9953 }, { "epoch": 2.519362186788155, "grad_norm": 0.1861705332994461, "learning_rate": 2.908222858811518e-06, "loss": 0.4406, "step": 9954 }, { "epoch": 2.519615287269046, "grad_norm": 0.18943555653095245, "learning_rate": 2.9065349923468565e-06, "loss": 0.483, "step": 9955 }, { "epoch": 2.519868387749937, "grad_norm": 0.19023063778877258, "learning_rate": 2.9048475325354897e-06, "loss": 0.4692, "step": 9956 }, { "epoch": 2.520121488230828, "grad_norm": 0.1881096512079239, "learning_rate": 2.9031604794741664e-06, "loss": 0.467, "step": 9957 }, { "epoch": 2.520374588711719, "grad_norm": 0.19024091958999634, "learning_rate": 2.9014738332595994e-06, "loss": 0.4338, "step": 9958 }, { "epoch": 2.5206276891926094, "grad_norm": 0.18127849698066711, "learning_rate": 2.899787593988479e-06, "loss": 0.4707, "step": 9959 }, { "epoch": 2.5208807896735004, "grad_norm": 0.18787279725074768, "learning_rate": 2.8981017617574793e-06, "loss": 0.4329, "step": 9960 }, { "epoch": 2.5211338901543914, "grad_norm": 0.18948616087436676, "learning_rate": 2.896416336663236e-06, "loss": 0.4563, "step": 9961 }, { "epoch": 2.521386990635282, "grad_norm": 0.18928484618663788, "learning_rate": 2.8947313188023764e-06, "loss": 0.4406, "step": 9962 }, { "epoch": 2.521640091116173, "grad_norm": 0.1859240084886551, "learning_rate": 2.893046708271494e-06, "loss": 0.4506, "step": 9963 }, { "epoch": 2.521893191597064, "grad_norm": 0.3013046979904175, "learning_rate": 2.8913625051671723e-06, "loss": 0.4328, "step": 9964 }, { "epoch": 2.522146292077955, "grad_norm": 0.1825062781572342, "learning_rate": 2.889678709585959e-06, "loss": 0.452, "step": 9965 }, { "epoch": 2.522399392558846, "grad_norm": 0.1807064712047577, "learning_rate": 2.887995321624386e-06, "loss": 0.4219, "step": 9966 }, { "epoch": 2.522652493039737, "grad_norm": 0.18429499864578247, "learning_rate": 2.886312341378952e-06, "loss": 0.4634, "step": 9967 }, { "epoch": 2.522905593520628, "grad_norm": 0.1877393126487732, "learning_rate": 2.884629768946141e-06, "loss": 0.4491, "step": 9968 }, { "epoch": 2.5231586940015185, "grad_norm": 0.19029687345027924, "learning_rate": 2.8829476044224093e-06, "loss": 0.4711, "step": 9969 }, { "epoch": 2.5234117944824095, "grad_norm": 0.1820301115512848, "learning_rate": 2.881265847904197e-06, "loss": 0.4572, "step": 9970 }, { "epoch": 2.5236648949633005, "grad_norm": 0.17800293862819672, "learning_rate": 2.879584499487916e-06, "loss": 0.4332, "step": 9971 }, { "epoch": 2.5239179954441915, "grad_norm": 0.19256213307380676, "learning_rate": 2.877903559269949e-06, "loss": 0.4437, "step": 9972 }, { "epoch": 2.524171095925082, "grad_norm": 0.1865469366312027, "learning_rate": 2.876223027346663e-06, "loss": 0.4585, "step": 9973 }, { "epoch": 2.524424196405973, "grad_norm": 0.1877376288175583, "learning_rate": 2.874542903814399e-06, "loss": 0.47, "step": 9974 }, { "epoch": 2.524677296886864, "grad_norm": 0.19018834829330444, "learning_rate": 2.872863188769477e-06, "loss": 0.4601, "step": 9975 }, { "epoch": 2.524930397367755, "grad_norm": 0.18594974279403687, "learning_rate": 2.871183882308186e-06, "loss": 0.4673, "step": 9976 }, { "epoch": 2.525183497848646, "grad_norm": 0.1856416016817093, "learning_rate": 2.8695049845268063e-06, "loss": 0.4299, "step": 9977 }, { "epoch": 2.525436598329537, "grad_norm": 0.2068381905555725, "learning_rate": 2.8678264955215783e-06, "loss": 0.4469, "step": 9978 }, { "epoch": 2.5256896988104276, "grad_norm": 0.1868816614151001, "learning_rate": 2.8661484153887288e-06, "loss": 0.461, "step": 9979 }, { "epoch": 2.5259427992913186, "grad_norm": 0.18005871772766113, "learning_rate": 2.864470744224457e-06, "loss": 0.4451, "step": 9980 }, { "epoch": 2.5261958997722096, "grad_norm": 0.18573454022407532, "learning_rate": 2.8627934821249414e-06, "loss": 0.4337, "step": 9981 }, { "epoch": 2.5264490002531006, "grad_norm": 0.17931640148162842, "learning_rate": 2.861116629186336e-06, "loss": 0.4503, "step": 9982 }, { "epoch": 2.526702100733991, "grad_norm": 0.19361227750778198, "learning_rate": 2.8594401855047716e-06, "loss": 0.4478, "step": 9983 }, { "epoch": 2.526955201214882, "grad_norm": 0.18780486285686493, "learning_rate": 2.857764151176353e-06, "loss": 0.4418, "step": 9984 }, { "epoch": 2.527208301695773, "grad_norm": 0.1875358521938324, "learning_rate": 2.856088526297166e-06, "loss": 0.4549, "step": 9985 }, { "epoch": 2.527461402176664, "grad_norm": 0.1856631636619568, "learning_rate": 2.85441331096327e-06, "loss": 0.4632, "step": 9986 }, { "epoch": 2.527714502657555, "grad_norm": 0.18293584883213043, "learning_rate": 2.852738505270701e-06, "loss": 0.4448, "step": 9987 }, { "epoch": 2.527967603138446, "grad_norm": 0.1837577223777771, "learning_rate": 2.851064109315472e-06, "loss": 0.4414, "step": 9988 }, { "epoch": 2.528220703619337, "grad_norm": 0.1888217180967331, "learning_rate": 2.8493901231935738e-06, "loss": 0.4608, "step": 9989 }, { "epoch": 2.5284738041002277, "grad_norm": 0.18560092151165009, "learning_rate": 2.8477165470009714e-06, "loss": 0.455, "step": 9990 }, { "epoch": 2.5287269045811187, "grad_norm": 0.19064217805862427, "learning_rate": 2.8460433808336072e-06, "loss": 0.4745, "step": 9991 }, { "epoch": 2.5289800050620097, "grad_norm": 1.547255039215088, "learning_rate": 2.8443706247874014e-06, "loss": 0.4543, "step": 9992 }, { "epoch": 2.5292331055429003, "grad_norm": 0.1904160976409912, "learning_rate": 2.842698278958249e-06, "loss": 0.4622, "step": 9993 }, { "epoch": 2.5294862060237913, "grad_norm": 0.18850098550319672, "learning_rate": 2.8410263434420215e-06, "loss": 0.4644, "step": 9994 }, { "epoch": 2.5297393065046823, "grad_norm": 0.18580666184425354, "learning_rate": 2.8393548183345675e-06, "loss": 0.4495, "step": 9995 }, { "epoch": 2.5299924069855733, "grad_norm": 0.1857292652130127, "learning_rate": 2.8376837037317118e-06, "loss": 0.4431, "step": 9996 }, { "epoch": 2.5302455074664643, "grad_norm": 0.20672619342803955, "learning_rate": 2.8360129997292575e-06, "loss": 0.4365, "step": 9997 }, { "epoch": 2.5304986079473553, "grad_norm": 0.18634754419326782, "learning_rate": 2.834342706422981e-06, "loss": 0.4459, "step": 9998 }, { "epoch": 2.5307517084282463, "grad_norm": 0.18319173157215118, "learning_rate": 2.83267282390864e-06, "loss": 0.4402, "step": 9999 }, { "epoch": 2.531004808909137, "grad_norm": 0.18860861659049988, "learning_rate": 2.8310033522819556e-06, "loss": 0.4589, "step": 10000 }, { "epoch": 2.531257909390028, "grad_norm": 0.18550045788288116, "learning_rate": 2.8293342916386456e-06, "loss": 0.486, "step": 10001 }, { "epoch": 2.531511009870919, "grad_norm": 0.19137351214885712, "learning_rate": 2.827665642074391e-06, "loss": 0.4535, "step": 10002 }, { "epoch": 2.53176411035181, "grad_norm": 0.18320919573307037, "learning_rate": 2.825997403684849e-06, "loss": 0.4529, "step": 10003 }, { "epoch": 2.5320172108327004, "grad_norm": 0.18326948583126068, "learning_rate": 2.8243295765656587e-06, "loss": 0.47, "step": 10004 }, { "epoch": 2.5322703113135914, "grad_norm": 0.18086472153663635, "learning_rate": 2.822662160812436e-06, "loss": 0.431, "step": 10005 }, { "epoch": 2.5325234117944824, "grad_norm": 0.18340259790420532, "learning_rate": 2.8209951565207626e-06, "loss": 0.451, "step": 10006 }, { "epoch": 2.5327765122753734, "grad_norm": 0.17993271350860596, "learning_rate": 2.819328563786207e-06, "loss": 0.4445, "step": 10007 }, { "epoch": 2.5330296127562644, "grad_norm": 0.41876769065856934, "learning_rate": 2.817662382704315e-06, "loss": 0.4592, "step": 10008 }, { "epoch": 2.5332827132371554, "grad_norm": 0.18930912017822266, "learning_rate": 2.815996613370603e-06, "loss": 0.4451, "step": 10009 }, { "epoch": 2.533535813718046, "grad_norm": 0.1900489777326584, "learning_rate": 2.814331255880569e-06, "loss": 0.4383, "step": 10010 }, { "epoch": 2.533788914198937, "grad_norm": 0.19377893209457397, "learning_rate": 2.8126663103296794e-06, "loss": 0.4688, "step": 10011 }, { "epoch": 2.534042014679828, "grad_norm": 0.18253260850906372, "learning_rate": 2.8110017768133837e-06, "loss": 0.4537, "step": 10012 }, { "epoch": 2.534295115160719, "grad_norm": 0.1915486603975296, "learning_rate": 2.8093376554271022e-06, "loss": 0.4509, "step": 10013 }, { "epoch": 2.5345482156416095, "grad_norm": 0.18958939611911774, "learning_rate": 2.807673946266245e-06, "loss": 0.4434, "step": 10014 }, { "epoch": 2.5348013161225005, "grad_norm": 0.25285497307777405, "learning_rate": 2.806010649426183e-06, "loss": 0.463, "step": 10015 }, { "epoch": 2.5350544166033915, "grad_norm": 0.18748915195465088, "learning_rate": 2.804347765002272e-06, "loss": 0.4494, "step": 10016 }, { "epoch": 2.5353075170842825, "grad_norm": 0.20568689703941345, "learning_rate": 2.8026852930898386e-06, "loss": 0.4365, "step": 10017 }, { "epoch": 2.5355606175651735, "grad_norm": 0.19014915823936462, "learning_rate": 2.8010232337841893e-06, "loss": 0.4365, "step": 10018 }, { "epoch": 2.5358137180460645, "grad_norm": 0.18166807293891907, "learning_rate": 2.799361587180607e-06, "loss": 0.4469, "step": 10019 }, { "epoch": 2.536066818526955, "grad_norm": 0.18431977927684784, "learning_rate": 2.7977003533743486e-06, "loss": 0.4377, "step": 10020 }, { "epoch": 2.536319919007846, "grad_norm": 0.18902981281280518, "learning_rate": 2.796039532460656e-06, "loss": 0.4463, "step": 10021 }, { "epoch": 2.536573019488737, "grad_norm": 0.20162159204483032, "learning_rate": 2.794379124534733e-06, "loss": 0.4707, "step": 10022 }, { "epoch": 2.536826119969628, "grad_norm": 0.18429657816886902, "learning_rate": 2.79271912969177e-06, "loss": 0.4386, "step": 10023 }, { "epoch": 2.5370792204505186, "grad_norm": 0.19003990292549133, "learning_rate": 2.791059548026931e-06, "loss": 0.4314, "step": 10024 }, { "epoch": 2.5373323209314096, "grad_norm": 0.18645289540290833, "learning_rate": 2.789400379635356e-06, "loss": 0.4423, "step": 10025 }, { "epoch": 2.5375854214123006, "grad_norm": 0.18643803894519806, "learning_rate": 2.787741624612159e-06, "loss": 0.4398, "step": 10026 }, { "epoch": 2.5378385218931916, "grad_norm": 0.19030411541461945, "learning_rate": 2.786083283052442e-06, "loss": 0.4367, "step": 10027 }, { "epoch": 2.5380916223740826, "grad_norm": 0.1919931322336197, "learning_rate": 2.7844253550512647e-06, "loss": 0.4382, "step": 10028 }, { "epoch": 2.5383447228549736, "grad_norm": 0.1947229951620102, "learning_rate": 2.7827678407036752e-06, "loss": 0.4329, "step": 10029 }, { "epoch": 2.5385978233358646, "grad_norm": 0.1802547425031662, "learning_rate": 2.7811107401046966e-06, "loss": 0.4403, "step": 10030 }, { "epoch": 2.538850923816755, "grad_norm": 0.18786704540252686, "learning_rate": 2.7794540533493265e-06, "loss": 0.4402, "step": 10031 }, { "epoch": 2.539104024297646, "grad_norm": 0.18793120980262756, "learning_rate": 2.7777977805325384e-06, "loss": 0.4314, "step": 10032 }, { "epoch": 2.539357124778537, "grad_norm": 0.2848781645298004, "learning_rate": 2.7761419217492836e-06, "loss": 0.4645, "step": 10033 }, { "epoch": 2.539610225259428, "grad_norm": 0.19005922973155975, "learning_rate": 2.774486477094488e-06, "loss": 0.464, "step": 10034 }, { "epoch": 2.5398633257403187, "grad_norm": 0.1922098696231842, "learning_rate": 2.772831446663057e-06, "loss": 0.4632, "step": 10035 }, { "epoch": 2.5401164262212097, "grad_norm": 0.18187908828258514, "learning_rate": 2.771176830549868e-06, "loss": 0.4417, "step": 10036 }, { "epoch": 2.5403695267021007, "grad_norm": 0.18473802506923676, "learning_rate": 2.7695226288497767e-06, "loss": 0.4583, "step": 10037 }, { "epoch": 2.5406226271829917, "grad_norm": 0.18402975797653198, "learning_rate": 2.767868841657616e-06, "loss": 0.4651, "step": 10038 }, { "epoch": 2.5408757276638827, "grad_norm": 0.18638701736927032, "learning_rate": 2.766215469068194e-06, "loss": 0.4545, "step": 10039 }, { "epoch": 2.5411288281447737, "grad_norm": 0.19019222259521484, "learning_rate": 2.764562511176294e-06, "loss": 0.4418, "step": 10040 }, { "epoch": 2.5413819286256643, "grad_norm": 0.18565520644187927, "learning_rate": 2.762909968076677e-06, "loss": 0.4621, "step": 10041 }, { "epoch": 2.5416350291065553, "grad_norm": 0.18035127222537994, "learning_rate": 2.76125783986408e-06, "loss": 0.4691, "step": 10042 }, { "epoch": 2.5418881295874463, "grad_norm": 0.18847914040088654, "learning_rate": 2.7596061266332188e-06, "loss": 0.4527, "step": 10043 }, { "epoch": 2.5421412300683373, "grad_norm": 0.1853742003440857, "learning_rate": 2.7579548284787737e-06, "loss": 0.4291, "step": 10044 }, { "epoch": 2.542394330549228, "grad_norm": 0.19876785576343536, "learning_rate": 2.75630394549542e-06, "loss": 0.47, "step": 10045 }, { "epoch": 2.542647431030119, "grad_norm": 0.1874682754278183, "learning_rate": 2.7546534777777945e-06, "loss": 0.4567, "step": 10046 }, { "epoch": 2.54290053151101, "grad_norm": 0.18491841852664948, "learning_rate": 2.753003425420516e-06, "loss": 0.4599, "step": 10047 }, { "epoch": 2.543153631991901, "grad_norm": 0.18067201972007751, "learning_rate": 2.7513537885181797e-06, "loss": 0.4544, "step": 10048 }, { "epoch": 2.543406732472792, "grad_norm": 0.18405920267105103, "learning_rate": 2.7497045671653568e-06, "loss": 0.4543, "step": 10049 }, { "epoch": 2.543659832953683, "grad_norm": 0.20320245623588562, "learning_rate": 2.748055761456587e-06, "loss": 0.4707, "step": 10050 }, { "epoch": 2.5439129334345734, "grad_norm": 0.18633663654327393, "learning_rate": 2.7464073714863958e-06, "loss": 0.4315, "step": 10051 }, { "epoch": 2.5441660339154644, "grad_norm": 0.18798963725566864, "learning_rate": 2.744759397349286e-06, "loss": 0.4518, "step": 10052 }, { "epoch": 2.5444191343963554, "grad_norm": 0.18001310527324677, "learning_rate": 2.7431118391397303e-06, "loss": 0.4592, "step": 10053 }, { "epoch": 2.5446722348772464, "grad_norm": 0.1881578117609024, "learning_rate": 2.7414646969521787e-06, "loss": 0.4564, "step": 10054 }, { "epoch": 2.544925335358137, "grad_norm": 0.186203271150589, "learning_rate": 2.7398179708810623e-06, "loss": 0.4356, "step": 10055 }, { "epoch": 2.545178435839028, "grad_norm": 0.18575648963451385, "learning_rate": 2.7381716610207776e-06, "loss": 0.4586, "step": 10056 }, { "epoch": 2.545431536319919, "grad_norm": 0.18342582881450653, "learning_rate": 2.736525767465704e-06, "loss": 0.4569, "step": 10057 }, { "epoch": 2.54568463680081, "grad_norm": 0.18132220208644867, "learning_rate": 2.7348802903102044e-06, "loss": 0.437, "step": 10058 }, { "epoch": 2.545937737281701, "grad_norm": 0.19312989711761475, "learning_rate": 2.7332352296486063e-06, "loss": 0.4316, "step": 10059 }, { "epoch": 2.546190837762592, "grad_norm": 0.18035143613815308, "learning_rate": 2.731590585575221e-06, "loss": 0.4297, "step": 10060 }, { "epoch": 2.546443938243483, "grad_norm": 0.18124113976955414, "learning_rate": 2.729946358184327e-06, "loss": 0.4513, "step": 10061 }, { "epoch": 2.5466970387243735, "grad_norm": 0.17524045705795288, "learning_rate": 2.7283025475701864e-06, "loss": 0.4387, "step": 10062 }, { "epoch": 2.5469501392052645, "grad_norm": 0.18828803300857544, "learning_rate": 2.726659153827035e-06, "loss": 0.4769, "step": 10063 }, { "epoch": 2.5472032396861555, "grad_norm": 0.18840070068836212, "learning_rate": 2.7250161770490833e-06, "loss": 0.436, "step": 10064 }, { "epoch": 2.5474563401670465, "grad_norm": 0.1820862889289856, "learning_rate": 2.723373617330525e-06, "loss": 0.437, "step": 10065 }, { "epoch": 2.547709440647937, "grad_norm": 0.278234601020813, "learning_rate": 2.7217314747655253e-06, "loss": 0.4489, "step": 10066 }, { "epoch": 2.547962541128828, "grad_norm": 0.18185743689537048, "learning_rate": 2.7200897494482172e-06, "loss": 0.4268, "step": 10067 }, { "epoch": 2.548215641609719, "grad_norm": 0.18640218675136566, "learning_rate": 2.718448441472722e-06, "loss": 0.4562, "step": 10068 }, { "epoch": 2.54846874209061, "grad_norm": 0.19006755948066711, "learning_rate": 2.7168075509331314e-06, "loss": 0.4566, "step": 10069 }, { "epoch": 2.548721842571501, "grad_norm": 0.19564220309257507, "learning_rate": 2.715167077923515e-06, "loss": 0.4559, "step": 10070 }, { "epoch": 2.548974943052392, "grad_norm": 0.19050532579421997, "learning_rate": 2.7135270225379162e-06, "loss": 0.4536, "step": 10071 }, { "epoch": 2.5492280435332826, "grad_norm": 0.2105705738067627, "learning_rate": 2.7118873848703575e-06, "loss": 0.4711, "step": 10072 }, { "epoch": 2.5494811440141736, "grad_norm": 0.18885491788387299, "learning_rate": 2.710248165014835e-06, "loss": 0.4506, "step": 10073 }, { "epoch": 2.5497342444950646, "grad_norm": 0.1915927678346634, "learning_rate": 2.708609363065322e-06, "loss": 0.4649, "step": 10074 }, { "epoch": 2.5499873449759556, "grad_norm": 0.1949172466993332, "learning_rate": 2.7069709791157673e-06, "loss": 0.4426, "step": 10075 }, { "epoch": 2.550240445456846, "grad_norm": 0.1826252043247223, "learning_rate": 2.7053330132600963e-06, "loss": 0.4382, "step": 10076 }, { "epoch": 2.550493545937737, "grad_norm": 0.18847213685512543, "learning_rate": 2.7036954655922086e-06, "loss": 0.4686, "step": 10077 }, { "epoch": 2.550746646418628, "grad_norm": 0.1864190399646759, "learning_rate": 2.7020583362059836e-06, "loss": 0.4804, "step": 10078 }, { "epoch": 2.550999746899519, "grad_norm": 0.3970728814601898, "learning_rate": 2.700421625195274e-06, "loss": 0.4601, "step": 10079 }, { "epoch": 2.55125284738041, "grad_norm": 0.18455387651920319, "learning_rate": 2.6987853326539082e-06, "loss": 0.4466, "step": 10080 }, { "epoch": 2.551505947861301, "grad_norm": 0.19789916276931763, "learning_rate": 2.6971494586756918e-06, "loss": 0.5006, "step": 10081 }, { "epoch": 2.5517590483421917, "grad_norm": 0.19274453818798065, "learning_rate": 2.695514003354405e-06, "loss": 0.4732, "step": 10082 }, { "epoch": 2.5520121488230827, "grad_norm": 0.18300741910934448, "learning_rate": 2.693878966783806e-06, "loss": 0.4569, "step": 10083 }, { "epoch": 2.5522652493039737, "grad_norm": 0.1867552250623703, "learning_rate": 2.692244349057628e-06, "loss": 0.4631, "step": 10084 }, { "epoch": 2.5525183497848647, "grad_norm": 0.18253293633460999, "learning_rate": 2.6906101502695804e-06, "loss": 0.4618, "step": 10085 }, { "epoch": 2.5527714502657552, "grad_norm": 0.18756355345249176, "learning_rate": 2.6889763705133478e-06, "loss": 0.4355, "step": 10086 }, { "epoch": 2.5530245507466462, "grad_norm": 0.18216581642627716, "learning_rate": 2.6873430098825903e-06, "loss": 0.425, "step": 10087 }, { "epoch": 2.5532776512275372, "grad_norm": 0.18047209084033966, "learning_rate": 2.685710068470947e-06, "loss": 0.4585, "step": 10088 }, { "epoch": 2.5535307517084282, "grad_norm": 0.18239182233810425, "learning_rate": 2.6840775463720303e-06, "loss": 0.4461, "step": 10089 }, { "epoch": 2.5537838521893192, "grad_norm": 0.18687807023525238, "learning_rate": 2.6824454436794277e-06, "loss": 0.4559, "step": 10090 }, { "epoch": 2.5540369526702102, "grad_norm": 0.18842557072639465, "learning_rate": 2.680813760486707e-06, "loss": 0.4554, "step": 10091 }, { "epoch": 2.5542900531511012, "grad_norm": 0.18071787059307098, "learning_rate": 2.679182496887407e-06, "loss": 0.4246, "step": 10092 }, { "epoch": 2.554543153631992, "grad_norm": 0.18840673565864563, "learning_rate": 2.677551652975049e-06, "loss": 0.4638, "step": 10093 }, { "epoch": 2.554796254112883, "grad_norm": 0.17790691554546356, "learning_rate": 2.675921228843118e-06, "loss": 0.4201, "step": 10094 }, { "epoch": 2.555049354593774, "grad_norm": 0.18585360050201416, "learning_rate": 2.6742912245850837e-06, "loss": 0.4778, "step": 10095 }, { "epoch": 2.555302455074665, "grad_norm": 0.18912088871002197, "learning_rate": 2.6726616402943973e-06, "loss": 0.4483, "step": 10096 }, { "epoch": 2.5555555555555554, "grad_norm": 0.1835925430059433, "learning_rate": 2.671032476064477e-06, "loss": 0.4636, "step": 10097 }, { "epoch": 2.5558086560364464, "grad_norm": 0.18669888377189636, "learning_rate": 2.669403731988717e-06, "loss": 0.4497, "step": 10098 }, { "epoch": 2.5560617565173374, "grad_norm": 0.1843961924314499, "learning_rate": 2.6677754081604944e-06, "loss": 0.4698, "step": 10099 }, { "epoch": 2.5563148569982284, "grad_norm": 0.18573735654354095, "learning_rate": 2.666147504673151e-06, "loss": 0.4514, "step": 10100 }, { "epoch": 2.5565679574791194, "grad_norm": 0.18741099536418915, "learning_rate": 2.664520021620014e-06, "loss": 0.467, "step": 10101 }, { "epoch": 2.5568210579600104, "grad_norm": 0.1797780692577362, "learning_rate": 2.662892959094382e-06, "loss": 0.439, "step": 10102 }, { "epoch": 2.557074158440901, "grad_norm": 0.1885903775691986, "learning_rate": 2.6612663171895346e-06, "loss": 0.4572, "step": 10103 }, { "epoch": 2.557327258921792, "grad_norm": 0.19496095180511475, "learning_rate": 2.6596400959987257e-06, "loss": 0.4527, "step": 10104 }, { "epoch": 2.557580359402683, "grad_norm": 0.1792518049478531, "learning_rate": 2.658014295615177e-06, "loss": 0.4113, "step": 10105 }, { "epoch": 2.557833459883574, "grad_norm": 0.18085502088069916, "learning_rate": 2.6563889161320932e-06, "loss": 0.4413, "step": 10106 }, { "epoch": 2.5580865603644645, "grad_norm": 0.1817583590745926, "learning_rate": 2.6547639576426566e-06, "loss": 0.4631, "step": 10107 }, { "epoch": 2.5583396608453555, "grad_norm": 0.18332654237747192, "learning_rate": 2.6531394202400173e-06, "loss": 0.4574, "step": 10108 }, { "epoch": 2.5585927613262465, "grad_norm": 0.1849878579378128, "learning_rate": 2.6515153040173138e-06, "loss": 0.4407, "step": 10109 }, { "epoch": 2.5588458618071375, "grad_norm": 0.18603429198265076, "learning_rate": 2.649891609067653e-06, "loss": 0.4807, "step": 10110 }, { "epoch": 2.5590989622880285, "grad_norm": 0.18970263004302979, "learning_rate": 2.648268335484112e-06, "loss": 0.425, "step": 10111 }, { "epoch": 2.5593520627689195, "grad_norm": 0.18484920263290405, "learning_rate": 2.6466454833597532e-06, "loss": 0.4362, "step": 10112 }, { "epoch": 2.55960516324981, "grad_norm": 0.18412871658802032, "learning_rate": 2.6450230527876096e-06, "loss": 0.4361, "step": 10113 }, { "epoch": 2.559858263730701, "grad_norm": 0.18566758930683136, "learning_rate": 2.6434010438606927e-06, "loss": 0.4405, "step": 10114 }, { "epoch": 2.560111364211592, "grad_norm": 0.1870509833097458, "learning_rate": 2.641779456671989e-06, "loss": 0.4443, "step": 10115 }, { "epoch": 2.560364464692483, "grad_norm": 0.18694978952407837, "learning_rate": 2.6401582913144607e-06, "loss": 0.3982, "step": 10116 }, { "epoch": 2.5606175651733736, "grad_norm": 0.18050754070281982, "learning_rate": 2.638537547881046e-06, "loss": 0.4311, "step": 10117 }, { "epoch": 2.5608706656542646, "grad_norm": 0.18526650965213776, "learning_rate": 2.636917226464657e-06, "loss": 0.4622, "step": 10118 }, { "epoch": 2.5611237661351556, "grad_norm": 0.18502788245677948, "learning_rate": 2.635297327158185e-06, "loss": 0.4548, "step": 10119 }, { "epoch": 2.5613768666160466, "grad_norm": 0.1911090910434723, "learning_rate": 2.6336778500544958e-06, "loss": 0.4514, "step": 10120 }, { "epoch": 2.5616299670969376, "grad_norm": 0.185916930437088, "learning_rate": 2.6320587952464285e-06, "loss": 0.4706, "step": 10121 }, { "epoch": 2.5618830675778286, "grad_norm": 0.19583147764205933, "learning_rate": 2.6304401628268016e-06, "loss": 0.4147, "step": 10122 }, { "epoch": 2.5621361680587196, "grad_norm": 0.18534432351589203, "learning_rate": 2.6288219528884073e-06, "loss": 0.4489, "step": 10123 }, { "epoch": 2.56238926853961, "grad_norm": 0.18600061535835266, "learning_rate": 2.6272041655240154e-06, "loss": 0.4672, "step": 10124 }, { "epoch": 2.562642369020501, "grad_norm": 0.19017387926578522, "learning_rate": 2.6255868008263686e-06, "loss": 0.4637, "step": 10125 }, { "epoch": 2.562895469501392, "grad_norm": 0.1875229924917221, "learning_rate": 2.623969858888187e-06, "loss": 0.4681, "step": 10126 }, { "epoch": 2.563148569982283, "grad_norm": 0.18585650622844696, "learning_rate": 2.6223533398021685e-06, "loss": 0.4491, "step": 10127 }, { "epoch": 2.5634016704631737, "grad_norm": 0.1839316189289093, "learning_rate": 2.620737243660981e-06, "loss": 0.4723, "step": 10128 }, { "epoch": 2.5636547709440647, "grad_norm": 0.19064678251743317, "learning_rate": 2.619121570557276e-06, "loss": 0.4524, "step": 10129 }, { "epoch": 2.5639078714249557, "grad_norm": 0.18491435050964355, "learning_rate": 2.617506320583674e-06, "loss": 0.4506, "step": 10130 }, { "epoch": 2.5641609719058467, "grad_norm": 0.4554470479488373, "learning_rate": 2.615891493832775e-06, "loss": 0.4634, "step": 10131 }, { "epoch": 2.5644140723867377, "grad_norm": 0.18798719346523285, "learning_rate": 2.6142770903971558e-06, "loss": 0.4508, "step": 10132 }, { "epoch": 2.5646671728676287, "grad_norm": 0.18607255816459656, "learning_rate": 2.612663110369358e-06, "loss": 0.4588, "step": 10133 }, { "epoch": 2.5649202733485192, "grad_norm": 0.18356862664222717, "learning_rate": 2.6110495538419166e-06, "loss": 0.4356, "step": 10134 }, { "epoch": 2.5651733738294102, "grad_norm": 0.18155445158481598, "learning_rate": 2.609436420907331e-06, "loss": 0.4609, "step": 10135 }, { "epoch": 2.5654264743103012, "grad_norm": 0.18776844441890717, "learning_rate": 2.6078237116580785e-06, "loss": 0.4721, "step": 10136 }, { "epoch": 2.5656795747911922, "grad_norm": 0.18880058825016022, "learning_rate": 2.606211426186611e-06, "loss": 0.4501, "step": 10137 }, { "epoch": 2.565932675272083, "grad_norm": 0.18476901948451996, "learning_rate": 2.6045995645853626e-06, "loss": 0.4466, "step": 10138 }, { "epoch": 2.566185775752974, "grad_norm": 0.1847609132528305, "learning_rate": 2.6029881269467273e-06, "loss": 0.4786, "step": 10139 }, { "epoch": 2.566438876233865, "grad_norm": 0.1793556958436966, "learning_rate": 2.601377113363094e-06, "loss": 0.4321, "step": 10140 }, { "epoch": 2.566691976714756, "grad_norm": 0.19241204857826233, "learning_rate": 2.599766523926818e-06, "loss": 0.4475, "step": 10141 }, { "epoch": 2.566945077195647, "grad_norm": 0.1856948733329773, "learning_rate": 2.598156358730228e-06, "loss": 0.4451, "step": 10142 }, { "epoch": 2.567198177676538, "grad_norm": 0.1826203316450119, "learning_rate": 2.5965466178656364e-06, "loss": 0.4307, "step": 10143 }, { "epoch": 2.5674512781574284, "grad_norm": 0.18450266122817993, "learning_rate": 2.59493730142532e-06, "loss": 0.4439, "step": 10144 }, { "epoch": 2.5677043786383194, "grad_norm": 0.17987783253192902, "learning_rate": 2.59332840950154e-06, "loss": 0.4417, "step": 10145 }, { "epoch": 2.5679574791192104, "grad_norm": 0.1984582543373108, "learning_rate": 2.591719942186527e-06, "loss": 0.4854, "step": 10146 }, { "epoch": 2.5682105796001014, "grad_norm": 0.20151907205581665, "learning_rate": 2.5901118995724995e-06, "loss": 0.4337, "step": 10147 }, { "epoch": 2.568463680080992, "grad_norm": 0.19237062335014343, "learning_rate": 2.588504281751638e-06, "loss": 0.4438, "step": 10148 }, { "epoch": 2.568716780561883, "grad_norm": 0.18836940824985504, "learning_rate": 2.586897088816107e-06, "loss": 0.4689, "step": 10149 }, { "epoch": 2.568969881042774, "grad_norm": 0.1924908608198166, "learning_rate": 2.585290320858038e-06, "loss": 0.4271, "step": 10150 }, { "epoch": 2.569222981523665, "grad_norm": 0.1841449737548828, "learning_rate": 2.5836839779695467e-06, "loss": 0.4487, "step": 10151 }, { "epoch": 2.569476082004556, "grad_norm": 0.19245260953903198, "learning_rate": 2.582078060242719e-06, "loss": 0.4583, "step": 10152 }, { "epoch": 2.569729182485447, "grad_norm": 0.18975123763084412, "learning_rate": 2.580472567769623e-06, "loss": 0.4456, "step": 10153 }, { "epoch": 2.569982282966338, "grad_norm": 0.18958984315395355, "learning_rate": 2.578867500642298e-06, "loss": 0.4368, "step": 10154 }, { "epoch": 2.5702353834472285, "grad_norm": 0.17950858175754547, "learning_rate": 2.577262858952756e-06, "loss": 0.4516, "step": 10155 }, { "epoch": 2.5704884839281195, "grad_norm": 0.18405859172344208, "learning_rate": 2.5756586427929887e-06, "loss": 0.4371, "step": 10156 }, { "epoch": 2.5707415844090105, "grad_norm": 0.18491853773593903, "learning_rate": 2.5740548522549613e-06, "loss": 0.4619, "step": 10157 }, { "epoch": 2.570994684889901, "grad_norm": 0.19623742997646332, "learning_rate": 2.5724514874306182e-06, "loss": 0.4442, "step": 10158 }, { "epoch": 2.571247785370792, "grad_norm": 0.19752688705921173, "learning_rate": 2.570848548411872e-06, "loss": 0.4685, "step": 10159 }, { "epoch": 2.571500885851683, "grad_norm": 0.1827579140663147, "learning_rate": 2.5692460352906256e-06, "loss": 0.4726, "step": 10160 }, { "epoch": 2.571753986332574, "grad_norm": 0.18831638991832733, "learning_rate": 2.567643948158739e-06, "loss": 0.4565, "step": 10161 }, { "epoch": 2.572007086813465, "grad_norm": 0.18677255511283875, "learning_rate": 2.566042287108058e-06, "loss": 0.4372, "step": 10162 }, { "epoch": 2.572260187294356, "grad_norm": 0.185468852519989, "learning_rate": 2.564441052230404e-06, "loss": 0.4699, "step": 10163 }, { "epoch": 2.572513287775247, "grad_norm": 0.20024485886096954, "learning_rate": 2.5628402436175724e-06, "loss": 0.4532, "step": 10164 }, { "epoch": 2.5727663882561376, "grad_norm": 0.19175027310848236, "learning_rate": 2.561239861361333e-06, "loss": 0.4439, "step": 10165 }, { "epoch": 2.5730194887370286, "grad_norm": 0.19629962742328644, "learning_rate": 2.559639905553434e-06, "loss": 0.4417, "step": 10166 }, { "epoch": 2.5732725892179196, "grad_norm": 0.17980588972568512, "learning_rate": 2.5580403762855944e-06, "loss": 0.4223, "step": 10167 }, { "epoch": 2.5735256896988106, "grad_norm": 0.20070575177669525, "learning_rate": 2.556441273649515e-06, "loss": 0.4612, "step": 10168 }, { "epoch": 2.573778790179701, "grad_norm": 0.18205375969409943, "learning_rate": 2.5548425977368686e-06, "loss": 0.44, "step": 10169 }, { "epoch": 2.574031890660592, "grad_norm": 0.1913781762123108, "learning_rate": 2.553244348639301e-06, "loss": 0.4527, "step": 10170 }, { "epoch": 2.574284991141483, "grad_norm": 0.1913093626499176, "learning_rate": 2.5516465264484403e-06, "loss": 0.4536, "step": 10171 }, { "epoch": 2.574538091622374, "grad_norm": 0.17946840822696686, "learning_rate": 2.5500491312558837e-06, "loss": 0.4301, "step": 10172 }, { "epoch": 2.574791192103265, "grad_norm": 0.18348723649978638, "learning_rate": 2.5484521631532076e-06, "loss": 0.4385, "step": 10173 }, { "epoch": 2.575044292584156, "grad_norm": 0.2256496250629425, "learning_rate": 2.546855622231962e-06, "loss": 0.4405, "step": 10174 }, { "epoch": 2.5752973930650467, "grad_norm": 0.18659470975399017, "learning_rate": 2.545259508583674e-06, "loss": 0.4608, "step": 10175 }, { "epoch": 2.5755504935459377, "grad_norm": 0.1822109967470169, "learning_rate": 2.5436638222998477e-06, "loss": 0.4462, "step": 10176 }, { "epoch": 2.5758035940268287, "grad_norm": 0.19241562485694885, "learning_rate": 2.542068563471952e-06, "loss": 0.4738, "step": 10177 }, { "epoch": 2.5760566945077197, "grad_norm": 0.18741746246814728, "learning_rate": 2.5404737321914486e-06, "loss": 0.4558, "step": 10178 }, { "epoch": 2.5763097949886102, "grad_norm": 0.18294546008110046, "learning_rate": 2.5388793285497624e-06, "loss": 0.4554, "step": 10179 }, { "epoch": 2.5765628954695012, "grad_norm": 0.18945392966270447, "learning_rate": 2.537285352638298e-06, "loss": 0.4639, "step": 10180 }, { "epoch": 2.5768159959503922, "grad_norm": 0.18698178231716156, "learning_rate": 2.5356918045484336e-06, "loss": 0.4583, "step": 10181 }, { "epoch": 2.5770690964312832, "grad_norm": 0.18990486860275269, "learning_rate": 2.5340986843715276e-06, "loss": 0.473, "step": 10182 }, { "epoch": 2.5773221969121742, "grad_norm": 0.19287759065628052, "learning_rate": 2.5325059921989016e-06, "loss": 0.4452, "step": 10183 }, { "epoch": 2.5775752973930652, "grad_norm": 0.18637104332447052, "learning_rate": 2.5309137281218686e-06, "loss": 0.4446, "step": 10184 }, { "epoch": 2.5778283978739562, "grad_norm": 0.18584312498569489, "learning_rate": 2.5293218922317087e-06, "loss": 0.4662, "step": 10185 }, { "epoch": 2.578081498354847, "grad_norm": 0.18657441437244415, "learning_rate": 2.527730484619677e-06, "loss": 0.4487, "step": 10186 }, { "epoch": 2.578334598835738, "grad_norm": 0.18056319653987885, "learning_rate": 2.5261395053770066e-06, "loss": 0.4407, "step": 10187 }, { "epoch": 2.578587699316629, "grad_norm": 0.186221644282341, "learning_rate": 2.5245489545949064e-06, "loss": 0.4489, "step": 10188 }, { "epoch": 2.5788407997975193, "grad_norm": 0.1940188854932785, "learning_rate": 2.522958832364555e-06, "loss": 0.452, "step": 10189 }, { "epoch": 2.5790939002784103, "grad_norm": 0.1890958845615387, "learning_rate": 2.521369138777111e-06, "loss": 0.4611, "step": 10190 }, { "epoch": 2.5793470007593013, "grad_norm": 0.18734003603458405, "learning_rate": 2.5197798739237124e-06, "loss": 0.4421, "step": 10191 }, { "epoch": 2.5796001012401923, "grad_norm": 0.19041982293128967, "learning_rate": 2.5181910378954666e-06, "loss": 0.4954, "step": 10192 }, { "epoch": 2.5798532017210833, "grad_norm": 0.19090664386749268, "learning_rate": 2.51660263078346e-06, "loss": 0.4525, "step": 10193 }, { "epoch": 2.5801063022019743, "grad_norm": 0.21303316950798035, "learning_rate": 2.515014652678749e-06, "loss": 0.4612, "step": 10194 }, { "epoch": 2.5803594026828653, "grad_norm": 0.1809835582971573, "learning_rate": 2.513427103672369e-06, "loss": 0.4295, "step": 10195 }, { "epoch": 2.580612503163756, "grad_norm": 0.1857752501964569, "learning_rate": 2.5118399838553298e-06, "loss": 0.4624, "step": 10196 }, { "epoch": 2.580865603644647, "grad_norm": 0.1825648546218872, "learning_rate": 2.510253293318623e-06, "loss": 0.4485, "step": 10197 }, { "epoch": 2.581118704125538, "grad_norm": 0.18681029975414276, "learning_rate": 2.508667032153208e-06, "loss": 0.4447, "step": 10198 }, { "epoch": 2.581371804606429, "grad_norm": 0.19173632562160492, "learning_rate": 2.507081200450023e-06, "loss": 0.452, "step": 10199 }, { "epoch": 2.5816249050873195, "grad_norm": 0.18802933394908905, "learning_rate": 2.5054957982999773e-06, "loss": 0.4262, "step": 10200 }, { "epoch": 2.5818780055682105, "grad_norm": 0.19624102115631104, "learning_rate": 2.5039108257939583e-06, "loss": 0.4413, "step": 10201 }, { "epoch": 2.5821311060491015, "grad_norm": 0.18731126189231873, "learning_rate": 2.5023262830228312e-06, "loss": 0.4662, "step": 10202 }, { "epoch": 2.5823842065299925, "grad_norm": 0.18885117769241333, "learning_rate": 2.50074217007743e-06, "loss": 0.4263, "step": 10203 }, { "epoch": 2.5826373070108835, "grad_norm": 0.19471941888332367, "learning_rate": 2.4991584870485795e-06, "loss": 0.4355, "step": 10204 }, { "epoch": 2.5828904074917745, "grad_norm": 0.20679761469364166, "learning_rate": 2.4975752340270587e-06, "loss": 0.4424, "step": 10205 }, { "epoch": 2.583143507972665, "grad_norm": 0.19463349878787994, "learning_rate": 2.495992411103635e-06, "loss": 0.4647, "step": 10206 }, { "epoch": 2.583396608453556, "grad_norm": 0.24246488511562347, "learning_rate": 2.4944100183690488e-06, "loss": 0.4482, "step": 10207 }, { "epoch": 2.583649708934447, "grad_norm": 0.183250293135643, "learning_rate": 2.492828055914015e-06, "loss": 0.4407, "step": 10208 }, { "epoch": 2.583902809415338, "grad_norm": 0.1852773129940033, "learning_rate": 2.491246523829224e-06, "loss": 0.4471, "step": 10209 }, { "epoch": 2.5841559098962286, "grad_norm": 0.18648658692836761, "learning_rate": 2.4896654222053417e-06, "loss": 0.4585, "step": 10210 }, { "epoch": 2.5844090103771196, "grad_norm": 0.1930375099182129, "learning_rate": 2.48808475113301e-06, "loss": 0.4598, "step": 10211 }, { "epoch": 2.5846621108580106, "grad_norm": 0.18510961532592773, "learning_rate": 2.486504510702844e-06, "loss": 0.4675, "step": 10212 }, { "epoch": 2.5849152113389016, "grad_norm": 0.19715119898319244, "learning_rate": 2.484924701005438e-06, "loss": 0.4613, "step": 10213 }, { "epoch": 2.5851683118197926, "grad_norm": 0.1873014271259308, "learning_rate": 2.4833453221313565e-06, "loss": 0.4686, "step": 10214 }, { "epoch": 2.5854214123006836, "grad_norm": 0.18468955159187317, "learning_rate": 2.481766374171143e-06, "loss": 0.4487, "step": 10215 }, { "epoch": 2.585674512781574, "grad_norm": 0.18266542255878448, "learning_rate": 2.480187857215316e-06, "loss": 0.4626, "step": 10216 }, { "epoch": 2.585927613262465, "grad_norm": 0.18308158218860626, "learning_rate": 2.4786097713543667e-06, "loss": 0.4623, "step": 10217 }, { "epoch": 2.586180713743356, "grad_norm": 0.18307586014270782, "learning_rate": 2.477032116678766e-06, "loss": 0.4601, "step": 10218 }, { "epoch": 2.586433814224247, "grad_norm": 0.18349070847034454, "learning_rate": 2.4754548932789546e-06, "loss": 0.446, "step": 10219 }, { "epoch": 2.5866869147051377, "grad_norm": 0.181152805685997, "learning_rate": 2.4738781012453526e-06, "loss": 0.4598, "step": 10220 }, { "epoch": 2.5869400151860287, "grad_norm": 0.18771490454673767, "learning_rate": 2.472301740668356e-06, "loss": 0.4492, "step": 10221 }, { "epoch": 2.5871931156669197, "grad_norm": 0.1844528615474701, "learning_rate": 2.4707258116383313e-06, "loss": 0.4604, "step": 10222 }, { "epoch": 2.5874462161478107, "grad_norm": 0.1868470013141632, "learning_rate": 2.4691503142456243e-06, "loss": 0.4546, "step": 10223 }, { "epoch": 2.5876993166287017, "grad_norm": 0.2044277936220169, "learning_rate": 2.467575248580556e-06, "loss": 0.4317, "step": 10224 }, { "epoch": 2.5879524171095927, "grad_norm": 0.18463145196437836, "learning_rate": 2.4660006147334192e-06, "loss": 0.4614, "step": 10225 }, { "epoch": 2.5882055175904837, "grad_norm": 0.18917405605316162, "learning_rate": 2.4644264127944895e-06, "loss": 0.4555, "step": 10226 }, { "epoch": 2.5884586180713742, "grad_norm": 0.19334647059440613, "learning_rate": 2.4628526428540033e-06, "loss": 0.4314, "step": 10227 }, { "epoch": 2.5887117185522652, "grad_norm": 0.18750028312206268, "learning_rate": 2.461279305002189e-06, "loss": 0.461, "step": 10228 }, { "epoch": 2.5889648190331562, "grad_norm": 0.19011545181274414, "learning_rate": 2.4597063993292412e-06, "loss": 0.4398, "step": 10229 }, { "epoch": 2.5892179195140472, "grad_norm": 0.18580175936222076, "learning_rate": 2.4581339259253313e-06, "loss": 0.4465, "step": 10230 }, { "epoch": 2.589471019994938, "grad_norm": 0.18729275465011597, "learning_rate": 2.456561884880604e-06, "loss": 0.4459, "step": 10231 }, { "epoch": 2.589724120475829, "grad_norm": 0.1912175565958023, "learning_rate": 2.4549902762851863e-06, "loss": 0.4505, "step": 10232 }, { "epoch": 2.58997722095672, "grad_norm": 0.1858912855386734, "learning_rate": 2.4534191002291672e-06, "loss": 0.4258, "step": 10233 }, { "epoch": 2.590230321437611, "grad_norm": 0.18441897630691528, "learning_rate": 2.451848356802621e-06, "loss": 0.4495, "step": 10234 }, { "epoch": 2.590483421918502, "grad_norm": 0.2100992500782013, "learning_rate": 2.4502780460956e-06, "loss": 0.4623, "step": 10235 }, { "epoch": 2.590736522399393, "grad_norm": 0.2097424566745758, "learning_rate": 2.448708168198124e-06, "loss": 0.4591, "step": 10236 }, { "epoch": 2.5909896228802833, "grad_norm": 0.1878472864627838, "learning_rate": 2.447138723200193e-06, "loss": 0.4537, "step": 10237 }, { "epoch": 2.5912427233611743, "grad_norm": 0.18418240547180176, "learning_rate": 2.445569711191774e-06, "loss": 0.428, "step": 10238 }, { "epoch": 2.5914958238420653, "grad_norm": 0.18393456935882568, "learning_rate": 2.44400113226282e-06, "loss": 0.4395, "step": 10239 }, { "epoch": 2.5917489243229563, "grad_norm": 0.18434078991413116, "learning_rate": 2.4424329865032525e-06, "loss": 0.4328, "step": 10240 }, { "epoch": 2.592002024803847, "grad_norm": 0.1857757419347763, "learning_rate": 2.440865274002967e-06, "loss": 0.4696, "step": 10241 }, { "epoch": 2.592255125284738, "grad_norm": 0.18855507671833038, "learning_rate": 2.4392979948518435e-06, "loss": 0.4632, "step": 10242 }, { "epoch": 2.592508225765629, "grad_norm": 0.18644340336322784, "learning_rate": 2.4377311491397314e-06, "loss": 0.4529, "step": 10243 }, { "epoch": 2.59276132624652, "grad_norm": 0.19024065136909485, "learning_rate": 2.4361647369564476e-06, "loss": 0.4705, "step": 10244 }, { "epoch": 2.593014426727411, "grad_norm": 0.1851094365119934, "learning_rate": 2.434598758391795e-06, "loss": 0.4526, "step": 10245 }, { "epoch": 2.593267527208302, "grad_norm": 0.18560907244682312, "learning_rate": 2.4330332135355484e-06, "loss": 0.4531, "step": 10246 }, { "epoch": 2.5935206276891924, "grad_norm": 0.19676028192043304, "learning_rate": 2.431468102477452e-06, "loss": 0.4485, "step": 10247 }, { "epoch": 2.5937737281700834, "grad_norm": 0.1800585389137268, "learning_rate": 2.4299034253072386e-06, "loss": 0.4162, "step": 10248 }, { "epoch": 2.5940268286509744, "grad_norm": 0.18243257701396942, "learning_rate": 2.4283391821146063e-06, "loss": 0.4294, "step": 10249 }, { "epoch": 2.5942799291318654, "grad_norm": 0.1874336153268814, "learning_rate": 2.426775372989224e-06, "loss": 0.4559, "step": 10250 }, { "epoch": 2.594533029612756, "grad_norm": 0.18604902923107147, "learning_rate": 2.4252119980207457e-06, "loss": 0.4334, "step": 10251 }, { "epoch": 2.594786130093647, "grad_norm": 0.18521910905838013, "learning_rate": 2.4236490572987946e-06, "loss": 0.448, "step": 10252 }, { "epoch": 2.595039230574538, "grad_norm": 0.19534330070018768, "learning_rate": 2.422086550912972e-06, "loss": 0.4579, "step": 10253 }, { "epoch": 2.595292331055429, "grad_norm": 0.1866769790649414, "learning_rate": 2.4205244789528536e-06, "loss": 0.4663, "step": 10254 }, { "epoch": 2.59554543153632, "grad_norm": 0.19304849207401276, "learning_rate": 2.418962841507988e-06, "loss": 0.4513, "step": 10255 }, { "epoch": 2.595798532017211, "grad_norm": 0.18085810542106628, "learning_rate": 2.417401638667902e-06, "loss": 0.4488, "step": 10256 }, { "epoch": 2.596051632498102, "grad_norm": 0.1890995055437088, "learning_rate": 2.415840870522096e-06, "loss": 0.4554, "step": 10257 }, { "epoch": 2.5963047329789926, "grad_norm": 0.1876252293586731, "learning_rate": 2.4142805371600443e-06, "loss": 0.4433, "step": 10258 }, { "epoch": 2.5965578334598836, "grad_norm": 0.18646793067455292, "learning_rate": 2.412720638671199e-06, "loss": 0.4834, "step": 10259 }, { "epoch": 2.5968109339407746, "grad_norm": 0.1823262870311737, "learning_rate": 2.411161175144984e-06, "loss": 0.4448, "step": 10260 }, { "epoch": 2.5970640344216656, "grad_norm": 0.19194599986076355, "learning_rate": 2.4096021466708007e-06, "loss": 0.4358, "step": 10261 }, { "epoch": 2.597317134902556, "grad_norm": 0.18447931110858917, "learning_rate": 2.4080435533380263e-06, "loss": 0.4409, "step": 10262 }, { "epoch": 2.597570235383447, "grad_norm": 0.18779496848583221, "learning_rate": 2.4064853952360103e-06, "loss": 0.4477, "step": 10263 }, { "epoch": 2.597823335864338, "grad_norm": 0.18418321013450623, "learning_rate": 2.404927672454079e-06, "loss": 0.4326, "step": 10264 }, { "epoch": 2.598076436345229, "grad_norm": 0.1860562413930893, "learning_rate": 2.403370385081534e-06, "loss": 0.4582, "step": 10265 }, { "epoch": 2.59832953682612, "grad_norm": 0.1858510822057724, "learning_rate": 2.4018135332076487e-06, "loss": 0.4426, "step": 10266 }, { "epoch": 2.598582637307011, "grad_norm": 0.18641319870948792, "learning_rate": 2.400257116921677e-06, "loss": 0.4666, "step": 10267 }, { "epoch": 2.5988357377879017, "grad_norm": 0.18934980034828186, "learning_rate": 2.3987011363128443e-06, "loss": 0.4512, "step": 10268 }, { "epoch": 2.5990888382687927, "grad_norm": 0.22863009572029114, "learning_rate": 2.3971455914703512e-06, "loss": 0.4529, "step": 10269 }, { "epoch": 2.5993419387496837, "grad_norm": 0.18454785645008087, "learning_rate": 2.3955904824833733e-06, "loss": 0.4954, "step": 10270 }, { "epoch": 2.5995950392305747, "grad_norm": 0.18083487451076508, "learning_rate": 2.3940358094410654e-06, "loss": 0.4298, "step": 10271 }, { "epoch": 2.5998481397114652, "grad_norm": 0.18010786175727844, "learning_rate": 2.3924815724325446e-06, "loss": 0.4419, "step": 10272 }, { "epoch": 2.6001012401923562, "grad_norm": 0.19724437594413757, "learning_rate": 2.39092777154692e-06, "loss": 0.4445, "step": 10273 }, { "epoch": 2.6003543406732472, "grad_norm": 0.1889108270406723, "learning_rate": 2.389374406873266e-06, "loss": 0.446, "step": 10274 }, { "epoch": 2.600607441154138, "grad_norm": 0.1869836002588272, "learning_rate": 2.3878214785006338e-06, "loss": 0.4813, "step": 10275 }, { "epoch": 2.600860541635029, "grad_norm": 0.18217402696609497, "learning_rate": 2.386268986518051e-06, "loss": 0.4317, "step": 10276 }, { "epoch": 2.60111364211592, "grad_norm": 0.1975875198841095, "learning_rate": 2.3847169310145147e-06, "loss": 0.454, "step": 10277 }, { "epoch": 2.6013667425968108, "grad_norm": 0.18812024593353271, "learning_rate": 2.383165312079e-06, "loss": 0.4585, "step": 10278 }, { "epoch": 2.6016198430777018, "grad_norm": 0.19019748270511627, "learning_rate": 2.3816141298004623e-06, "loss": 0.448, "step": 10279 }, { "epoch": 2.6018729435585928, "grad_norm": 0.19014866650104523, "learning_rate": 2.3800633842678276e-06, "loss": 0.4616, "step": 10280 }, { "epoch": 2.6021260440394838, "grad_norm": 0.18790490925312042, "learning_rate": 2.3785130755699946e-06, "loss": 0.4657, "step": 10281 }, { "epoch": 2.6023791445203743, "grad_norm": 0.18058517575263977, "learning_rate": 2.3769632037958432e-06, "loss": 0.4504, "step": 10282 }, { "epoch": 2.6026322450012653, "grad_norm": 0.4306124746799469, "learning_rate": 2.3754137690342185e-06, "loss": 0.4666, "step": 10283 }, { "epoch": 2.6028853454821563, "grad_norm": 0.1831706166267395, "learning_rate": 2.373864771373948e-06, "loss": 0.4796, "step": 10284 }, { "epoch": 2.6031384459630473, "grad_norm": 0.18854139745235443, "learning_rate": 2.37231621090383e-06, "loss": 0.447, "step": 10285 }, { "epoch": 2.6033915464439383, "grad_norm": 0.18985867500305176, "learning_rate": 2.370768087712647e-06, "loss": 0.4961, "step": 10286 }, { "epoch": 2.6036446469248293, "grad_norm": 0.1924763321876526, "learning_rate": 2.3692204018891494e-06, "loss": 0.4719, "step": 10287 }, { "epoch": 2.6038977474057203, "grad_norm": 0.19279856979846954, "learning_rate": 2.3676731535220566e-06, "loss": 0.4589, "step": 10288 }, { "epoch": 2.604150847886611, "grad_norm": 0.19023731350898743, "learning_rate": 2.366126342700071e-06, "loss": 0.4664, "step": 10289 }, { "epoch": 2.604403948367502, "grad_norm": 0.18551769852638245, "learning_rate": 2.3645799695118686e-06, "loss": 0.447, "step": 10290 }, { "epoch": 2.604657048848393, "grad_norm": 0.18078958988189697, "learning_rate": 2.363034034046099e-06, "loss": 0.4663, "step": 10291 }, { "epoch": 2.604910149329284, "grad_norm": 0.19135132431983948, "learning_rate": 2.36148853639139e-06, "loss": 0.4523, "step": 10292 }, { "epoch": 2.6051632498101744, "grad_norm": 0.19096121191978455, "learning_rate": 2.3599434766363426e-06, "loss": 0.4578, "step": 10293 }, { "epoch": 2.6054163502910654, "grad_norm": 0.18892499804496765, "learning_rate": 2.3583988548695268e-06, "loss": 0.4252, "step": 10294 }, { "epoch": 2.6056694507719564, "grad_norm": 0.18400703370571136, "learning_rate": 2.356854671179495e-06, "loss": 0.4614, "step": 10295 }, { "epoch": 2.6059225512528474, "grad_norm": 0.1834481954574585, "learning_rate": 2.355310925654771e-06, "loss": 0.4325, "step": 10296 }, { "epoch": 2.6061756517337384, "grad_norm": 0.18445120751857758, "learning_rate": 2.3537676183838566e-06, "loss": 0.4505, "step": 10297 }, { "epoch": 2.6064287522146294, "grad_norm": 0.18724167346954346, "learning_rate": 2.3522247494552243e-06, "loss": 0.437, "step": 10298 }, { "epoch": 2.60668185269552, "grad_norm": 0.19078831374645233, "learning_rate": 2.3506823189573245e-06, "loss": 0.4498, "step": 10299 }, { "epoch": 2.606934953176411, "grad_norm": 0.18812112510204315, "learning_rate": 2.349140326978582e-06, "loss": 0.4458, "step": 10300 }, { "epoch": 2.607188053657302, "grad_norm": 0.193805992603302, "learning_rate": 2.3475987736073945e-06, "loss": 0.4894, "step": 10301 }, { "epoch": 2.607441154138193, "grad_norm": 0.1925317943096161, "learning_rate": 2.346057658932137e-06, "loss": 0.4548, "step": 10302 }, { "epoch": 2.6076942546190836, "grad_norm": 0.19114449620246887, "learning_rate": 2.3445169830411586e-06, "loss": 0.4565, "step": 10303 }, { "epoch": 2.6079473550999746, "grad_norm": 0.1917719841003418, "learning_rate": 2.3429767460227814e-06, "loss": 0.4482, "step": 10304 }, { "epoch": 2.6082004555808656, "grad_norm": 0.1850748509168625, "learning_rate": 2.3414369479653055e-06, "loss": 0.449, "step": 10305 }, { "epoch": 2.6084535560617566, "grad_norm": 0.18578961491584778, "learning_rate": 2.339897588957003e-06, "loss": 0.4307, "step": 10306 }, { "epoch": 2.6087066565426476, "grad_norm": 0.19176840782165527, "learning_rate": 2.3383586690861236e-06, "loss": 0.4321, "step": 10307 }, { "epoch": 2.6089597570235386, "grad_norm": 0.19462187588214874, "learning_rate": 2.3368201884408893e-06, "loss": 0.4662, "step": 10308 }, { "epoch": 2.609212857504429, "grad_norm": 0.18085961043834686, "learning_rate": 2.3352821471094977e-06, "loss": 0.4707, "step": 10309 }, { "epoch": 2.60946595798532, "grad_norm": 0.18668466806411743, "learning_rate": 2.3337445451801223e-06, "loss": 0.4523, "step": 10310 }, { "epoch": 2.609719058466211, "grad_norm": 0.20240633189678192, "learning_rate": 2.332207382740911e-06, "loss": 0.4307, "step": 10311 }, { "epoch": 2.609972158947102, "grad_norm": 0.1863846331834793, "learning_rate": 2.3306706598799833e-06, "loss": 0.4418, "step": 10312 }, { "epoch": 2.6102252594279927, "grad_norm": 0.18529362976551056, "learning_rate": 2.329134376685439e-06, "loss": 0.4639, "step": 10313 }, { "epoch": 2.6104783599088837, "grad_norm": 0.20537607371807098, "learning_rate": 2.32759853324535e-06, "loss": 0.4317, "step": 10314 }, { "epoch": 2.6107314603897747, "grad_norm": 0.18620352447032928, "learning_rate": 2.3260631296477642e-06, "loss": 0.4478, "step": 10315 }, { "epoch": 2.6109845608706657, "grad_norm": 0.18637652695178986, "learning_rate": 2.324528165980695e-06, "loss": 0.4497, "step": 10316 }, { "epoch": 2.6112376613515567, "grad_norm": 0.18168261647224426, "learning_rate": 2.3229936423321475e-06, "loss": 0.4694, "step": 10317 }, { "epoch": 2.6114907618324477, "grad_norm": 0.18423214554786682, "learning_rate": 2.321459558790089e-06, "loss": 0.4394, "step": 10318 }, { "epoch": 2.6117438623133387, "grad_norm": 0.18778377771377563, "learning_rate": 2.319925915442467e-06, "loss": 0.4453, "step": 10319 }, { "epoch": 2.611996962794229, "grad_norm": 0.19235606491565704, "learning_rate": 2.318392712377201e-06, "loss": 0.4649, "step": 10320 }, { "epoch": 2.61225006327512, "grad_norm": 0.18565265834331512, "learning_rate": 2.3168599496821886e-06, "loss": 0.4505, "step": 10321 }, { "epoch": 2.612503163756011, "grad_norm": 0.17871883511543274, "learning_rate": 2.3153276274452916e-06, "loss": 0.4595, "step": 10322 }, { "epoch": 2.612756264236902, "grad_norm": 0.18642260134220123, "learning_rate": 2.3137957457543636e-06, "loss": 0.4577, "step": 10323 }, { "epoch": 2.6130093647177928, "grad_norm": 0.18722660839557648, "learning_rate": 2.3122643046972216e-06, "loss": 0.4597, "step": 10324 }, { "epoch": 2.6132624651986838, "grad_norm": 0.18519721925258636, "learning_rate": 2.3107333043616588e-06, "loss": 0.4759, "step": 10325 }, { "epoch": 2.6135155656795748, "grad_norm": 0.18936105072498322, "learning_rate": 2.309202744835448e-06, "loss": 0.4585, "step": 10326 }, { "epoch": 2.6137686661604658, "grad_norm": 0.18707320094108582, "learning_rate": 2.3076726262063274e-06, "loss": 0.4698, "step": 10327 }, { "epoch": 2.6140217666413568, "grad_norm": 0.18250609934329987, "learning_rate": 2.306142948562018e-06, "loss": 0.4432, "step": 10328 }, { "epoch": 2.6142748671222478, "grad_norm": 0.18687699735164642, "learning_rate": 2.304613711990209e-06, "loss": 0.456, "step": 10329 }, { "epoch": 2.6145279676031383, "grad_norm": 0.18474933505058289, "learning_rate": 2.303084916578575e-06, "loss": 0.4377, "step": 10330 }, { "epoch": 2.6147810680840293, "grad_norm": 0.19420617818832397, "learning_rate": 2.3015565624147563e-06, "loss": 0.4425, "step": 10331 }, { "epoch": 2.6150341685649203, "grad_norm": 0.18316014111042023, "learning_rate": 2.300028649586372e-06, "loss": 0.4449, "step": 10332 }, { "epoch": 2.6152872690458113, "grad_norm": 0.1830713301897049, "learning_rate": 2.2985011781810096e-06, "loss": 0.444, "step": 10333 }, { "epoch": 2.615540369526702, "grad_norm": 0.18547508120536804, "learning_rate": 2.2969741482862374e-06, "loss": 0.4327, "step": 10334 }, { "epoch": 2.615793470007593, "grad_norm": 0.18311189115047455, "learning_rate": 2.2954475599895944e-06, "loss": 0.4263, "step": 10335 }, { "epoch": 2.616046570488484, "grad_norm": 0.19228988885879517, "learning_rate": 2.2939214133786024e-06, "loss": 0.4401, "step": 10336 }, { "epoch": 2.616299670969375, "grad_norm": 0.3362661898136139, "learning_rate": 2.292395708540752e-06, "loss": 0.4598, "step": 10337 }, { "epoch": 2.616552771450266, "grad_norm": 0.18769028782844543, "learning_rate": 2.2908704455635033e-06, "loss": 0.4575, "step": 10338 }, { "epoch": 2.616805871931157, "grad_norm": 0.18828965723514557, "learning_rate": 2.2893456245342992e-06, "loss": 0.4494, "step": 10339 }, { "epoch": 2.6170589724120474, "grad_norm": 0.1897481381893158, "learning_rate": 2.2878212455405546e-06, "loss": 0.4718, "step": 10340 }, { "epoch": 2.6173120728929384, "grad_norm": 0.18536335229873657, "learning_rate": 2.2862973086696583e-06, "loss": 0.4652, "step": 10341 }, { "epoch": 2.6175651733738294, "grad_norm": 0.1883520632982254, "learning_rate": 2.2847738140089716e-06, "loss": 0.456, "step": 10342 }, { "epoch": 2.6178182738547204, "grad_norm": 0.18364930152893066, "learning_rate": 2.283250761645841e-06, "loss": 0.4613, "step": 10343 }, { "epoch": 2.618071374335611, "grad_norm": 0.17975597083568573, "learning_rate": 2.2817281516675736e-06, "loss": 0.4498, "step": 10344 }, { "epoch": 2.618324474816502, "grad_norm": 0.18530865013599396, "learning_rate": 2.2802059841614588e-06, "loss": 0.4495, "step": 10345 }, { "epoch": 2.618577575297393, "grad_norm": 0.18612714111804962, "learning_rate": 2.2786842592147585e-06, "loss": 0.4359, "step": 10346 }, { "epoch": 2.618830675778284, "grad_norm": 0.18669569492340088, "learning_rate": 2.277162976914712e-06, "loss": 0.4309, "step": 10347 }, { "epoch": 2.619083776259175, "grad_norm": 0.18833819031715393, "learning_rate": 2.2756421373485292e-06, "loss": 0.4461, "step": 10348 }, { "epoch": 2.619336876740066, "grad_norm": 0.19399824738502502, "learning_rate": 2.274121740603398e-06, "loss": 0.4315, "step": 10349 }, { "epoch": 2.619589977220957, "grad_norm": 0.18176206946372986, "learning_rate": 2.2726017867664784e-06, "loss": 0.4728, "step": 10350 }, { "epoch": 2.6198430777018475, "grad_norm": 0.18082557618618011, "learning_rate": 2.2710822759249074e-06, "loss": 0.4529, "step": 10351 }, { "epoch": 2.6200961781827385, "grad_norm": 0.18439491093158722, "learning_rate": 2.2695632081657938e-06, "loss": 0.4542, "step": 10352 }, { "epoch": 2.6203492786636295, "grad_norm": 0.1837778389453888, "learning_rate": 2.268044583576223e-06, "loss": 0.4646, "step": 10353 }, { "epoch": 2.62060237914452, "grad_norm": 0.18541578948497772, "learning_rate": 2.2665264022432565e-06, "loss": 0.4485, "step": 10354 }, { "epoch": 2.620855479625411, "grad_norm": 0.18266992270946503, "learning_rate": 2.265008664253926e-06, "loss": 0.4566, "step": 10355 }, { "epoch": 2.621108580106302, "grad_norm": 0.1929391771554947, "learning_rate": 2.26349136969524e-06, "loss": 0.4775, "step": 10356 }, { "epoch": 2.621361680587193, "grad_norm": 0.18809925019741058, "learning_rate": 2.261974518654184e-06, "loss": 0.4619, "step": 10357 }, { "epoch": 2.621614781068084, "grad_norm": 0.2814454734325409, "learning_rate": 2.2604581112177137e-06, "loss": 0.4588, "step": 10358 }, { "epoch": 2.621867881548975, "grad_norm": 0.18191403150558472, "learning_rate": 2.2589421474727657e-06, "loss": 0.4703, "step": 10359 }, { "epoch": 2.622120982029866, "grad_norm": 0.18087829649448395, "learning_rate": 2.2574266275062385e-06, "loss": 0.4217, "step": 10360 }, { "epoch": 2.6223740825107567, "grad_norm": 0.18759635090827942, "learning_rate": 2.255911551405021e-06, "loss": 0.4447, "step": 10361 }, { "epoch": 2.6226271829916477, "grad_norm": 0.18260696530342102, "learning_rate": 2.2543969192559678e-06, "loss": 0.4346, "step": 10362 }, { "epoch": 2.6228802834725387, "grad_norm": 0.18543456494808197, "learning_rate": 2.252882731145909e-06, "loss": 0.4511, "step": 10363 }, { "epoch": 2.6231333839534297, "grad_norm": 0.3043903708457947, "learning_rate": 2.2513689871616486e-06, "loss": 0.4504, "step": 10364 }, { "epoch": 2.62338648443432, "grad_norm": 0.18946559727191925, "learning_rate": 2.249855687389971e-06, "loss": 0.4438, "step": 10365 }, { "epoch": 2.623639584915211, "grad_norm": 0.1922096461057663, "learning_rate": 2.2483428319176215e-06, "loss": 0.4513, "step": 10366 }, { "epoch": 2.623892685396102, "grad_norm": 0.18653260171413422, "learning_rate": 2.246830420831336e-06, "loss": 0.4621, "step": 10367 }, { "epoch": 2.624145785876993, "grad_norm": 0.18864043056964874, "learning_rate": 2.2453184542178176e-06, "loss": 0.4266, "step": 10368 }, { "epoch": 2.624398886357884, "grad_norm": 0.19035187363624573, "learning_rate": 2.2438069321637414e-06, "loss": 0.4383, "step": 10369 }, { "epoch": 2.624651986838775, "grad_norm": 0.19361019134521484, "learning_rate": 2.242295854755765e-06, "loss": 0.4388, "step": 10370 }, { "epoch": 2.6249050873196658, "grad_norm": 0.19067806005477905, "learning_rate": 2.2407852220805073e-06, "loss": 0.4486, "step": 10371 }, { "epoch": 2.6251581878005568, "grad_norm": 0.18470309674739838, "learning_rate": 2.239275034224574e-06, "loss": 0.4503, "step": 10372 }, { "epoch": 2.6254112882814478, "grad_norm": 0.18492670357227325, "learning_rate": 2.2377652912745386e-06, "loss": 0.4441, "step": 10373 }, { "epoch": 2.6256643887623388, "grad_norm": 0.19197869300842285, "learning_rate": 2.236255993316956e-06, "loss": 0.4539, "step": 10374 }, { "epoch": 2.6259174892432293, "grad_norm": 0.21229954063892365, "learning_rate": 2.234747140438348e-06, "loss": 0.4522, "step": 10375 }, { "epoch": 2.6261705897241203, "grad_norm": 0.18811412155628204, "learning_rate": 2.233238732725217e-06, "loss": 0.4573, "step": 10376 }, { "epoch": 2.6264236902050113, "grad_norm": 0.19111303985118866, "learning_rate": 2.2317307702640322e-06, "loss": 0.4296, "step": 10377 }, { "epoch": 2.6266767906859023, "grad_norm": 0.18459370732307434, "learning_rate": 2.230223253141244e-06, "loss": 0.4486, "step": 10378 }, { "epoch": 2.6269298911667933, "grad_norm": 0.18178734183311462, "learning_rate": 2.228716181443271e-06, "loss": 0.4381, "step": 10379 }, { "epoch": 2.6271829916476843, "grad_norm": 0.18615145981311798, "learning_rate": 2.227209555256519e-06, "loss": 0.4673, "step": 10380 }, { "epoch": 2.6274360921285753, "grad_norm": 0.19300027191638947, "learning_rate": 2.225703374667354e-06, "loss": 0.4566, "step": 10381 }, { "epoch": 2.627689192609466, "grad_norm": 0.1859292834997177, "learning_rate": 2.2241976397621266e-06, "loss": 0.4622, "step": 10382 }, { "epoch": 2.627942293090357, "grad_norm": 0.18927820026874542, "learning_rate": 2.2226923506271516e-06, "loss": 0.4641, "step": 10383 }, { "epoch": 2.628195393571248, "grad_norm": 0.18265004456043243, "learning_rate": 2.2211875073487265e-06, "loss": 0.4468, "step": 10384 }, { "epoch": 2.6284484940521384, "grad_norm": 0.1826864629983902, "learning_rate": 2.21968311001312e-06, "loss": 0.4579, "step": 10385 }, { "epoch": 2.6287015945330294, "grad_norm": 0.18795183300971985, "learning_rate": 2.2181791587065748e-06, "loss": 0.4791, "step": 10386 }, { "epoch": 2.6289546950139204, "grad_norm": 0.18580661714076996, "learning_rate": 2.216675653515318e-06, "loss": 0.4546, "step": 10387 }, { "epoch": 2.6292077954948114, "grad_norm": 0.1899155229330063, "learning_rate": 2.215172594525531e-06, "loss": 0.4658, "step": 10388 }, { "epoch": 2.6294608959757024, "grad_norm": 0.190522700548172, "learning_rate": 2.2136699818233863e-06, "loss": 0.4476, "step": 10389 }, { "epoch": 2.6297139964565934, "grad_norm": 0.18267682194709778, "learning_rate": 2.2121678154950255e-06, "loss": 0.4529, "step": 10390 }, { "epoch": 2.6299670969374844, "grad_norm": 0.18738071620464325, "learning_rate": 2.2106660956265623e-06, "loss": 0.4734, "step": 10391 }, { "epoch": 2.630220197418375, "grad_norm": 0.1800118237733841, "learning_rate": 2.209164822304091e-06, "loss": 0.4272, "step": 10392 }, { "epoch": 2.630473297899266, "grad_norm": 0.18674883246421814, "learning_rate": 2.2076639956136724e-06, "loss": 0.4339, "step": 10393 }, { "epoch": 2.630726398380157, "grad_norm": 0.18007346987724304, "learning_rate": 2.206163615641348e-06, "loss": 0.4312, "step": 10394 }, { "epoch": 2.630979498861048, "grad_norm": 0.1826339215040207, "learning_rate": 2.2046636824731314e-06, "loss": 0.4426, "step": 10395 }, { "epoch": 2.6312325993419385, "grad_norm": 0.18515613675117493, "learning_rate": 2.203164196195009e-06, "loss": 0.4547, "step": 10396 }, { "epoch": 2.6314856998228295, "grad_norm": 0.18370597064495087, "learning_rate": 2.201665156892945e-06, "loss": 0.4292, "step": 10397 }, { "epoch": 2.6317388003037205, "grad_norm": 0.19577202200889587, "learning_rate": 2.200166564652876e-06, "loss": 0.4389, "step": 10398 }, { "epoch": 2.6319919007846115, "grad_norm": 0.18325668573379517, "learning_rate": 2.198668419560711e-06, "loss": 0.4497, "step": 10399 }, { "epoch": 2.6322450012655025, "grad_norm": 0.18138711154460907, "learning_rate": 2.1971707217023384e-06, "loss": 0.4283, "step": 10400 }, { "epoch": 2.6324981017463935, "grad_norm": 0.22044752538204193, "learning_rate": 2.1956734711636163e-06, "loss": 0.4182, "step": 10401 }, { "epoch": 2.632751202227284, "grad_norm": 0.183938130736351, "learning_rate": 2.194176668030379e-06, "loss": 0.4168, "step": 10402 }, { "epoch": 2.633004302708175, "grad_norm": 0.1923074573278427, "learning_rate": 2.1926803123884344e-06, "loss": 0.4718, "step": 10403 }, { "epoch": 2.633257403189066, "grad_norm": 0.18494780361652374, "learning_rate": 2.1911844043235675e-06, "loss": 0.4236, "step": 10404 }, { "epoch": 2.633510503669957, "grad_norm": 0.18293128907680511, "learning_rate": 2.189688943921534e-06, "loss": 0.4273, "step": 10405 }, { "epoch": 2.6337636041508476, "grad_norm": 0.18751125037670135, "learning_rate": 2.1881939312680654e-06, "loss": 0.4496, "step": 10406 }, { "epoch": 2.6340167046317386, "grad_norm": 0.18582367897033691, "learning_rate": 2.186699366448869e-06, "loss": 0.4388, "step": 10407 }, { "epoch": 2.6342698051126296, "grad_norm": 0.188422292470932, "learning_rate": 2.1852052495496225e-06, "loss": 0.4272, "step": 10408 }, { "epoch": 2.6345229055935206, "grad_norm": 0.1923607736825943, "learning_rate": 2.183711580655986e-06, "loss": 0.4722, "step": 10409 }, { "epoch": 2.6347760060744116, "grad_norm": 0.19846676290035248, "learning_rate": 2.1822183598535782e-06, "loss": 0.4652, "step": 10410 }, { "epoch": 2.6350291065553026, "grad_norm": 0.19231678545475006, "learning_rate": 2.180725587228011e-06, "loss": 0.4594, "step": 10411 }, { "epoch": 2.635282207036193, "grad_norm": 0.18891753256320953, "learning_rate": 2.179233262864859e-06, "loss": 0.4346, "step": 10412 }, { "epoch": 2.635535307517084, "grad_norm": 0.18716563284397125, "learning_rate": 2.1777413868496756e-06, "loss": 0.4179, "step": 10413 }, { "epoch": 2.635788407997975, "grad_norm": 0.18639901280403137, "learning_rate": 2.1762499592679844e-06, "loss": 0.4585, "step": 10414 }, { "epoch": 2.636041508478866, "grad_norm": 0.1874418556690216, "learning_rate": 2.1747589802052905e-06, "loss": 0.47, "step": 10415 }, { "epoch": 2.6362946089597568, "grad_norm": 0.18160401284694672, "learning_rate": 2.1732684497470625e-06, "loss": 0.4432, "step": 10416 }, { "epoch": 2.6365477094406478, "grad_norm": 0.1885703057050705, "learning_rate": 2.1717783679787475e-06, "loss": 0.4808, "step": 10417 }, { "epoch": 2.6368008099215388, "grad_norm": 0.18800115585327148, "learning_rate": 2.170288734985777e-06, "loss": 0.4427, "step": 10418 }, { "epoch": 2.6370539104024298, "grad_norm": 0.19018523395061493, "learning_rate": 2.1687995508535455e-06, "loss": 0.452, "step": 10419 }, { "epoch": 2.6373070108833208, "grad_norm": 0.19008518755435944, "learning_rate": 2.167310815667425e-06, "loss": 0.4601, "step": 10420 }, { "epoch": 2.6375601113642118, "grad_norm": 0.19687116146087646, "learning_rate": 2.1658225295127577e-06, "loss": 0.4216, "step": 10421 }, { "epoch": 2.6378132118451028, "grad_norm": 0.18159854412078857, "learning_rate": 2.1643346924748677e-06, "loss": 0.4447, "step": 10422 }, { "epoch": 2.6380663123259933, "grad_norm": 0.1884097158908844, "learning_rate": 2.1628473046390473e-06, "loss": 0.4457, "step": 10423 }, { "epoch": 2.6383194128068843, "grad_norm": 0.2009223997592926, "learning_rate": 2.1613603660905645e-06, "loss": 0.4492, "step": 10424 }, { "epoch": 2.6385725132877753, "grad_norm": 0.1892174929380417, "learning_rate": 2.1598738769146676e-06, "loss": 0.4591, "step": 10425 }, { "epoch": 2.6388256137686663, "grad_norm": 0.18635790050029755, "learning_rate": 2.1583878371965716e-06, "loss": 0.4304, "step": 10426 }, { "epoch": 2.639078714249557, "grad_norm": 0.18739940226078033, "learning_rate": 2.156902247021465e-06, "loss": 0.4882, "step": 10427 }, { "epoch": 2.639331814730448, "grad_norm": 0.1968119889497757, "learning_rate": 2.1554171064745156e-06, "loss": 0.4469, "step": 10428 }, { "epoch": 2.639584915211339, "grad_norm": 0.190376877784729, "learning_rate": 2.1539324156408636e-06, "loss": 0.4216, "step": 10429 }, { "epoch": 2.63983801569223, "grad_norm": 0.19530151784420013, "learning_rate": 2.1524481746056192e-06, "loss": 0.4778, "step": 10430 }, { "epoch": 2.640091116173121, "grad_norm": 0.18951860070228577, "learning_rate": 2.150964383453881e-06, "loss": 0.4519, "step": 10431 }, { "epoch": 2.640344216654012, "grad_norm": 0.18834136426448822, "learning_rate": 2.1494810422707003e-06, "loss": 0.4706, "step": 10432 }, { "epoch": 2.6405973171349024, "grad_norm": 0.188601553440094, "learning_rate": 2.1479981511411197e-06, "loss": 0.4485, "step": 10433 }, { "epoch": 2.6408504176157934, "grad_norm": 0.18956761062145233, "learning_rate": 2.14651571015015e-06, "loss": 0.4629, "step": 10434 }, { "epoch": 2.6411035180966844, "grad_norm": 0.18375292420387268, "learning_rate": 2.145033719382774e-06, "loss": 0.464, "step": 10435 }, { "epoch": 2.6413566185775754, "grad_norm": 0.19505518674850464, "learning_rate": 2.1435521789239533e-06, "loss": 0.4776, "step": 10436 }, { "epoch": 2.641609719058466, "grad_norm": 0.19023889303207397, "learning_rate": 2.1420710888586203e-06, "loss": 0.488, "step": 10437 }, { "epoch": 2.641862819539357, "grad_norm": 0.19477581977844238, "learning_rate": 2.1405904492716844e-06, "loss": 0.4649, "step": 10438 }, { "epoch": 2.642115920020248, "grad_norm": 0.18926884233951569, "learning_rate": 2.139110260248025e-06, "loss": 0.4621, "step": 10439 }, { "epoch": 2.642369020501139, "grad_norm": 0.1912250816822052, "learning_rate": 2.1376305218725e-06, "loss": 0.4328, "step": 10440 }, { "epoch": 2.64262212098203, "grad_norm": 0.20307134091854095, "learning_rate": 2.1361512342299397e-06, "loss": 0.4479, "step": 10441 }, { "epoch": 2.642875221462921, "grad_norm": 0.18979570269584656, "learning_rate": 2.1346723974051462e-06, "loss": 0.4487, "step": 10442 }, { "epoch": 2.6431283219438115, "grad_norm": 0.18328888714313507, "learning_rate": 2.133194011482902e-06, "loss": 0.4509, "step": 10443 }, { "epoch": 2.6433814224247025, "grad_norm": 0.19388948380947113, "learning_rate": 2.131716076547957e-06, "loss": 0.4553, "step": 10444 }, { "epoch": 2.6436345229055935, "grad_norm": 0.20554785430431366, "learning_rate": 2.130238592685039e-06, "loss": 0.4673, "step": 10445 }, { "epoch": 2.6438876233864845, "grad_norm": 0.18701022863388062, "learning_rate": 2.128761559978848e-06, "loss": 0.457, "step": 10446 }, { "epoch": 2.644140723867375, "grad_norm": 0.18875841796398163, "learning_rate": 2.1272849785140604e-06, "loss": 0.438, "step": 10447 }, { "epoch": 2.644393824348266, "grad_norm": 0.1846994161605835, "learning_rate": 2.125808848375325e-06, "loss": 0.4345, "step": 10448 }, { "epoch": 2.644646924829157, "grad_norm": 0.18967366218566895, "learning_rate": 2.1243331696472645e-06, "loss": 0.4572, "step": 10449 }, { "epoch": 2.644900025310048, "grad_norm": 0.18977482616901398, "learning_rate": 2.122857942414479e-06, "loss": 0.4671, "step": 10450 }, { "epoch": 2.645153125790939, "grad_norm": 0.18443302810192108, "learning_rate": 2.121383166761537e-06, "loss": 0.4342, "step": 10451 }, { "epoch": 2.64540622627183, "grad_norm": 0.19114866852760315, "learning_rate": 2.1199088427729852e-06, "loss": 0.4729, "step": 10452 }, { "epoch": 2.645659326752721, "grad_norm": 0.18636710941791534, "learning_rate": 2.1184349705333452e-06, "loss": 0.4644, "step": 10453 }, { "epoch": 2.6459124272336116, "grad_norm": 0.18908074498176575, "learning_rate": 2.116961550127111e-06, "loss": 0.4494, "step": 10454 }, { "epoch": 2.6461655277145026, "grad_norm": 0.18617337942123413, "learning_rate": 2.115488581638745e-06, "loss": 0.4335, "step": 10455 }, { "epoch": 2.6464186281953936, "grad_norm": 0.1876223087310791, "learning_rate": 2.1140160651526965e-06, "loss": 0.4573, "step": 10456 }, { "epoch": 2.6466717286762846, "grad_norm": 0.183118537068367, "learning_rate": 2.1125440007533783e-06, "loss": 0.4357, "step": 10457 }, { "epoch": 2.646924829157175, "grad_norm": 0.19024737179279327, "learning_rate": 2.1110723885251805e-06, "loss": 0.4809, "step": 10458 }, { "epoch": 2.647177929638066, "grad_norm": 0.18716923892498016, "learning_rate": 2.1096012285524725e-06, "loss": 0.4425, "step": 10459 }, { "epoch": 2.647431030118957, "grad_norm": 0.18890653550624847, "learning_rate": 2.1081305209195847e-06, "loss": 0.4375, "step": 10460 }, { "epoch": 2.647684130599848, "grad_norm": 0.1919790506362915, "learning_rate": 2.1066602657108316e-06, "loss": 0.446, "step": 10461 }, { "epoch": 2.647937231080739, "grad_norm": 0.18537771701812744, "learning_rate": 2.1051904630105045e-06, "loss": 0.4496, "step": 10462 }, { "epoch": 2.64819033156163, "grad_norm": 0.18746891617774963, "learning_rate": 2.103721112902861e-06, "loss": 0.468, "step": 10463 }, { "epoch": 2.6484434320425208, "grad_norm": 0.18505781888961792, "learning_rate": 2.102252215472136e-06, "loss": 0.4412, "step": 10464 }, { "epoch": 2.6486965325234118, "grad_norm": 0.19373415410518646, "learning_rate": 2.1007837708025413e-06, "loss": 0.4246, "step": 10465 }, { "epoch": 2.6489496330043028, "grad_norm": 0.1869661509990692, "learning_rate": 2.0993157789782547e-06, "loss": 0.476, "step": 10466 }, { "epoch": 2.6492027334851938, "grad_norm": 0.18246600031852722, "learning_rate": 2.0978482400834354e-06, "loss": 0.4436, "step": 10467 }, { "epoch": 2.6494558339660843, "grad_norm": 0.18358756601810455, "learning_rate": 2.0963811542022105e-06, "loss": 0.4509, "step": 10468 }, { "epoch": 2.6497089344469753, "grad_norm": 0.18833380937576294, "learning_rate": 2.094914521418692e-06, "loss": 0.4369, "step": 10469 }, { "epoch": 2.6499620349278663, "grad_norm": 0.1820700466632843, "learning_rate": 2.093448341816957e-06, "loss": 0.4589, "step": 10470 }, { "epoch": 2.6502151354087573, "grad_norm": 0.1864461600780487, "learning_rate": 2.091982615481055e-06, "loss": 0.447, "step": 10471 }, { "epoch": 2.6504682358896483, "grad_norm": 0.18550263345241547, "learning_rate": 2.0905173424950143e-06, "loss": 0.4505, "step": 10472 }, { "epoch": 2.6507213363705393, "grad_norm": 0.18887843191623688, "learning_rate": 2.089052522942837e-06, "loss": 0.457, "step": 10473 }, { "epoch": 2.65097443685143, "grad_norm": 0.18904559314250946, "learning_rate": 2.0875881569084945e-06, "loss": 0.4639, "step": 10474 }, { "epoch": 2.651227537332321, "grad_norm": 0.18879492580890656, "learning_rate": 2.0861242444759424e-06, "loss": 0.4606, "step": 10475 }, { "epoch": 2.651480637813212, "grad_norm": 0.18804670870304108, "learning_rate": 2.084660785729101e-06, "loss": 0.4775, "step": 10476 }, { "epoch": 2.651733738294103, "grad_norm": 0.19153741002082825, "learning_rate": 2.0831977807518644e-06, "loss": 0.4411, "step": 10477 }, { "epoch": 2.6519868387749934, "grad_norm": 0.18396133184432983, "learning_rate": 2.081735229628107e-06, "loss": 0.4438, "step": 10478 }, { "epoch": 2.6522399392558844, "grad_norm": 0.1818324625492096, "learning_rate": 2.0802731324416703e-06, "loss": 0.4469, "step": 10479 }, { "epoch": 2.6524930397367754, "grad_norm": 0.185378760099411, "learning_rate": 2.0788114892763767e-06, "loss": 0.443, "step": 10480 }, { "epoch": 2.6527461402176664, "grad_norm": 0.18762776255607605, "learning_rate": 2.0773503002160166e-06, "loss": 0.4484, "step": 10481 }, { "epoch": 2.6529992406985574, "grad_norm": 0.18714913725852966, "learning_rate": 2.0758895653443588e-06, "loss": 0.4649, "step": 10482 }, { "epoch": 2.6532523411794484, "grad_norm": 0.18257376551628113, "learning_rate": 2.074429284745143e-06, "loss": 0.4251, "step": 10483 }, { "epoch": 2.6535054416603394, "grad_norm": 0.19056294858455658, "learning_rate": 2.0729694585020833e-06, "loss": 0.4623, "step": 10484 }, { "epoch": 2.65375854214123, "grad_norm": 0.1876656711101532, "learning_rate": 2.0715100866988703e-06, "loss": 0.4554, "step": 10485 }, { "epoch": 2.654011642622121, "grad_norm": 0.18835343420505524, "learning_rate": 2.070051169419166e-06, "loss": 0.4336, "step": 10486 }, { "epoch": 2.654264743103012, "grad_norm": 0.1834125816822052, "learning_rate": 2.068592706746606e-06, "loss": 0.4552, "step": 10487 }, { "epoch": 2.654517843583903, "grad_norm": 0.18344804644584656, "learning_rate": 2.067134698764802e-06, "loss": 0.4251, "step": 10488 }, { "epoch": 2.6547709440647935, "grad_norm": 0.18564271926879883, "learning_rate": 2.0656771455573388e-06, "loss": 0.4436, "step": 10489 }, { "epoch": 2.6550240445456845, "grad_norm": 0.18636174499988556, "learning_rate": 2.0642200472077734e-06, "loss": 0.4478, "step": 10490 }, { "epoch": 2.6552771450265755, "grad_norm": 0.19045548141002655, "learning_rate": 2.0627634037996392e-06, "loss": 0.4614, "step": 10491 }, { "epoch": 2.6555302455074665, "grad_norm": 0.18915608525276184, "learning_rate": 2.0613072154164426e-06, "loss": 0.455, "step": 10492 }, { "epoch": 2.6557833459883575, "grad_norm": 0.1819225549697876, "learning_rate": 2.0598514821416637e-06, "loss": 0.4628, "step": 10493 }, { "epoch": 2.6560364464692485, "grad_norm": 0.18817004561424255, "learning_rate": 2.0583962040587556e-06, "loss": 0.4712, "step": 10494 }, { "epoch": 2.656289546950139, "grad_norm": 0.18520338833332062, "learning_rate": 2.056941381251147e-06, "loss": 0.463, "step": 10495 }, { "epoch": 2.65654264743103, "grad_norm": 0.19071140885353088, "learning_rate": 2.05548701380224e-06, "loss": 0.4574, "step": 10496 }, { "epoch": 2.656795747911921, "grad_norm": 0.19759893417358398, "learning_rate": 2.054033101795411e-06, "loss": 0.4405, "step": 10497 }, { "epoch": 2.657048848392812, "grad_norm": 0.20032234489917755, "learning_rate": 2.052579645314011e-06, "loss": 0.4397, "step": 10498 }, { "epoch": 2.6573019488737026, "grad_norm": 0.1882588267326355, "learning_rate": 2.0511266444413556e-06, "loss": 0.4404, "step": 10499 }, { "epoch": 2.6575550493545936, "grad_norm": 0.18509791791439056, "learning_rate": 2.049674099260752e-06, "loss": 0.472, "step": 10500 }, { "epoch": 2.6578081498354846, "grad_norm": 0.18729494512081146, "learning_rate": 2.048222009855467e-06, "loss": 0.4528, "step": 10501 }, { "epoch": 2.6580612503163756, "grad_norm": 0.19367121160030365, "learning_rate": 2.0467703763087466e-06, "loss": 0.4574, "step": 10502 }, { "epoch": 2.6583143507972666, "grad_norm": 0.18431958556175232, "learning_rate": 2.04531919870381e-06, "loss": 0.4367, "step": 10503 }, { "epoch": 2.6585674512781576, "grad_norm": 0.19103029370307922, "learning_rate": 2.0438684771238527e-06, "loss": 0.4729, "step": 10504 }, { "epoch": 2.658820551759048, "grad_norm": 0.1979900449514389, "learning_rate": 2.042418211652033e-06, "loss": 0.4647, "step": 10505 }, { "epoch": 2.659073652239939, "grad_norm": 0.183732271194458, "learning_rate": 2.0409684023715006e-06, "loss": 0.45, "step": 10506 }, { "epoch": 2.65932675272083, "grad_norm": 0.18897786736488342, "learning_rate": 2.0395190493653648e-06, "loss": 0.4431, "step": 10507 }, { "epoch": 2.659579853201721, "grad_norm": 0.19145330786705017, "learning_rate": 2.038070152716718e-06, "loss": 0.4547, "step": 10508 }, { "epoch": 2.6598329536826117, "grad_norm": 0.19214819371700287, "learning_rate": 2.036621712508622e-06, "loss": 0.4681, "step": 10509 }, { "epoch": 2.6600860541635027, "grad_norm": 0.19569577276706696, "learning_rate": 2.035173728824108e-06, "loss": 0.434, "step": 10510 }, { "epoch": 2.6603391546443937, "grad_norm": 0.18672679364681244, "learning_rate": 2.0337262017461888e-06, "loss": 0.4401, "step": 10511 }, { "epoch": 2.6605922551252847, "grad_norm": 0.18417075276374817, "learning_rate": 2.0322791313578462e-06, "loss": 0.4269, "step": 10512 }, { "epoch": 2.6608453556061757, "grad_norm": 0.1828157603740692, "learning_rate": 2.030832517742041e-06, "loss": 0.4612, "step": 10513 }, { "epoch": 2.6610984560870667, "grad_norm": 0.1860661506652832, "learning_rate": 2.0293863609817032e-06, "loss": 0.4502, "step": 10514 }, { "epoch": 2.6613515565679577, "grad_norm": 0.1971128284931183, "learning_rate": 2.0279406611597408e-06, "loss": 0.4595, "step": 10515 }, { "epoch": 2.6616046570488483, "grad_norm": 0.18877097964286804, "learning_rate": 2.026495418359027e-06, "loss": 0.4288, "step": 10516 }, { "epoch": 2.6618577575297393, "grad_norm": 0.18913856148719788, "learning_rate": 2.0250506326624174e-06, "loss": 0.4355, "step": 10517 }, { "epoch": 2.6621108580106303, "grad_norm": 0.18601065874099731, "learning_rate": 2.0236063041527353e-06, "loss": 0.4577, "step": 10518 }, { "epoch": 2.6623639584915213, "grad_norm": 0.19315260648727417, "learning_rate": 2.0221624329127864e-06, "loss": 0.4482, "step": 10519 }, { "epoch": 2.662617058972412, "grad_norm": 0.18496465682983398, "learning_rate": 2.020719019025346e-06, "loss": 0.4637, "step": 10520 }, { "epoch": 2.662870159453303, "grad_norm": 0.18923160433769226, "learning_rate": 2.0192760625731534e-06, "loss": 0.4442, "step": 10521 }, { "epoch": 2.663123259934194, "grad_norm": 0.18704251945018768, "learning_rate": 2.0178335636389367e-06, "loss": 0.4493, "step": 10522 }, { "epoch": 2.663376360415085, "grad_norm": 0.1859833151102066, "learning_rate": 2.0163915223053887e-06, "loss": 0.4246, "step": 10523 }, { "epoch": 2.663629460895976, "grad_norm": 0.19532644748687744, "learning_rate": 2.014949938655181e-06, "loss": 0.4485, "step": 10524 }, { "epoch": 2.663882561376867, "grad_norm": 0.18831630051136017, "learning_rate": 2.0135088127709512e-06, "loss": 0.4581, "step": 10525 }, { "epoch": 2.6641356618577574, "grad_norm": 0.18922466039657593, "learning_rate": 2.0120681447353252e-06, "loss": 0.4411, "step": 10526 }, { "epoch": 2.6643887623386484, "grad_norm": 0.19991092383861542, "learning_rate": 2.0106279346308864e-06, "loss": 0.4573, "step": 10527 }, { "epoch": 2.6646418628195394, "grad_norm": 0.1887381225824356, "learning_rate": 2.0091881825402006e-06, "loss": 0.4644, "step": 10528 }, { "epoch": 2.6648949633004304, "grad_norm": 0.18610642850399017, "learning_rate": 2.0077488885458053e-06, "loss": 0.4464, "step": 10529 }, { "epoch": 2.665148063781321, "grad_norm": 0.1853058934211731, "learning_rate": 2.0063100527302125e-06, "loss": 0.4555, "step": 10530 }, { "epoch": 2.665401164262212, "grad_norm": 0.19489552080631256, "learning_rate": 2.004871675175908e-06, "loss": 0.4556, "step": 10531 }, { "epoch": 2.665654264743103, "grad_norm": 0.1866457611322403, "learning_rate": 2.0034337559653506e-06, "loss": 0.4383, "step": 10532 }, { "epoch": 2.665907365223994, "grad_norm": 0.18839052319526672, "learning_rate": 2.0019962951809735e-06, "loss": 0.4769, "step": 10533 }, { "epoch": 2.666160465704885, "grad_norm": 0.1864476203918457, "learning_rate": 2.000559292905183e-06, "loss": 0.445, "step": 10534 }, { "epoch": 2.666413566185776, "grad_norm": 0.1864781379699707, "learning_rate": 1.99912274922036e-06, "loss": 0.411, "step": 10535 }, { "epoch": 2.6666666666666665, "grad_norm": 0.18802028894424438, "learning_rate": 1.997686664208858e-06, "loss": 0.4596, "step": 10536 }, { "epoch": 2.6669197671475575, "grad_norm": 0.18760772049427032, "learning_rate": 1.996251037953003e-06, "loss": 0.4369, "step": 10537 }, { "epoch": 2.6671728676284485, "grad_norm": 0.1831413060426712, "learning_rate": 1.9948158705350996e-06, "loss": 0.4439, "step": 10538 }, { "epoch": 2.6674259681093395, "grad_norm": 0.18801049888134003, "learning_rate": 1.993381162037421e-06, "loss": 0.4443, "step": 10539 }, { "epoch": 2.66767906859023, "grad_norm": 0.18600329756736755, "learning_rate": 1.9919469125422174e-06, "loss": 0.4508, "step": 10540 }, { "epoch": 2.667932169071121, "grad_norm": 0.18912816047668457, "learning_rate": 1.9905131221317088e-06, "loss": 0.4478, "step": 10541 }, { "epoch": 2.668185269552012, "grad_norm": 0.1800682246685028, "learning_rate": 1.9890797908880966e-06, "loss": 0.4535, "step": 10542 }, { "epoch": 2.668438370032903, "grad_norm": 0.18587249517440796, "learning_rate": 1.987646918893541e-06, "loss": 0.4517, "step": 10543 }, { "epoch": 2.668691470513794, "grad_norm": 0.18285077810287476, "learning_rate": 1.986214506230194e-06, "loss": 0.4482, "step": 10544 }, { "epoch": 2.668944570994685, "grad_norm": 0.1827673614025116, "learning_rate": 1.9847825529801703e-06, "loss": 0.4248, "step": 10545 }, { "epoch": 2.669197671475576, "grad_norm": 0.18859384953975677, "learning_rate": 1.983351059225561e-06, "loss": 0.4047, "step": 10546 }, { "epoch": 2.6694507719564666, "grad_norm": 0.18704228103160858, "learning_rate": 1.9819200250484293e-06, "loss": 0.4339, "step": 10547 }, { "epoch": 2.6697038724373576, "grad_norm": 0.18823346495628357, "learning_rate": 1.980489450530818e-06, "loss": 0.467, "step": 10548 }, { "epoch": 2.6699569729182486, "grad_norm": 0.18563877046108246, "learning_rate": 1.9790593357547294e-06, "loss": 0.4591, "step": 10549 }, { "epoch": 2.670210073399139, "grad_norm": 0.19367140531539917, "learning_rate": 1.9776296808021567e-06, "loss": 0.4695, "step": 10550 }, { "epoch": 2.67046317388003, "grad_norm": 0.19865545630455017, "learning_rate": 1.9762004857550576e-06, "loss": 0.4249, "step": 10551 }, { "epoch": 2.670716274360921, "grad_norm": 0.19091789424419403, "learning_rate": 1.9747717506953644e-06, "loss": 0.4677, "step": 10552 }, { "epoch": 2.670969374841812, "grad_norm": 0.18652458488941193, "learning_rate": 1.9733434757049864e-06, "loss": 0.4581, "step": 10553 }, { "epoch": 2.671222475322703, "grad_norm": 0.19152304530143738, "learning_rate": 1.971915660865796e-06, "loss": 0.4677, "step": 10554 }, { "epoch": 2.671475575803594, "grad_norm": 0.1881839632987976, "learning_rate": 1.9704883062596526e-06, "loss": 0.4602, "step": 10555 }, { "epoch": 2.671728676284485, "grad_norm": 0.19120202958583832, "learning_rate": 1.969061411968379e-06, "loss": 0.4331, "step": 10556 }, { "epoch": 2.6719817767653757, "grad_norm": 0.20156744122505188, "learning_rate": 1.9676349780737826e-06, "loss": 0.4693, "step": 10557 }, { "epoch": 2.6722348772462667, "grad_norm": 0.19229404628276825, "learning_rate": 1.9662090046576334e-06, "loss": 0.4577, "step": 10558 }, { "epoch": 2.6724879777271577, "grad_norm": 0.1860230565071106, "learning_rate": 1.964783491801683e-06, "loss": 0.4225, "step": 10559 }, { "epoch": 2.6727410782080487, "grad_norm": 0.18517157435417175, "learning_rate": 1.963358439587648e-06, "loss": 0.4541, "step": 10560 }, { "epoch": 2.6729941786889393, "grad_norm": 0.1844562441110611, "learning_rate": 1.961933848097226e-06, "loss": 0.4413, "step": 10561 }, { "epoch": 2.6732472791698303, "grad_norm": 0.19236545264720917, "learning_rate": 1.960509717412086e-06, "loss": 0.436, "step": 10562 }, { "epoch": 2.6735003796507213, "grad_norm": 0.1895384043455124, "learning_rate": 1.959086047613867e-06, "loss": 0.4421, "step": 10563 }, { "epoch": 2.6737534801316123, "grad_norm": 0.18467368185520172, "learning_rate": 1.9576628387841933e-06, "loss": 0.4254, "step": 10564 }, { "epoch": 2.6740065806125033, "grad_norm": 0.1849248707294464, "learning_rate": 1.9562400910046475e-06, "loss": 0.4243, "step": 10565 }, { "epoch": 2.6742596810933943, "grad_norm": 0.18666693568229675, "learning_rate": 1.9548178043567945e-06, "loss": 0.4372, "step": 10566 }, { "epoch": 2.674512781574285, "grad_norm": 0.18622373044490814, "learning_rate": 1.95339597892217e-06, "loss": 0.4345, "step": 10567 }, { "epoch": 2.674765882055176, "grad_norm": 0.1847359836101532, "learning_rate": 1.951974614782286e-06, "loss": 0.4561, "step": 10568 }, { "epoch": 2.675018982536067, "grad_norm": 0.1873624175786972, "learning_rate": 1.9505537120186223e-06, "loss": 0.4492, "step": 10569 }, { "epoch": 2.675272083016958, "grad_norm": 0.19310005009174347, "learning_rate": 1.9491332707126455e-06, "loss": 0.4334, "step": 10570 }, { "epoch": 2.6755251834978484, "grad_norm": 0.19995327293872833, "learning_rate": 1.9477132909457772e-06, "loss": 0.4244, "step": 10571 }, { "epoch": 2.6757782839787394, "grad_norm": 0.1862015575170517, "learning_rate": 1.9462937727994256e-06, "loss": 0.4388, "step": 10572 }, { "epoch": 2.6760313844596304, "grad_norm": 0.18318325281143188, "learning_rate": 1.9448747163549673e-06, "loss": 0.4406, "step": 10573 }, { "epoch": 2.6762844849405214, "grad_norm": 0.18875563144683838, "learning_rate": 1.9434561216937552e-06, "loss": 0.4624, "step": 10574 }, { "epoch": 2.6765375854214124, "grad_norm": 0.19055268168449402, "learning_rate": 1.942037988897114e-06, "loss": 0.4652, "step": 10575 }, { "epoch": 2.6767906859023034, "grad_norm": 0.18865063786506653, "learning_rate": 1.940620318046341e-06, "loss": 0.4663, "step": 10576 }, { "epoch": 2.6770437863831944, "grad_norm": 0.18990862369537354, "learning_rate": 1.9392031092227105e-06, "loss": 0.4696, "step": 10577 }, { "epoch": 2.677296886864085, "grad_norm": 0.18988928198814392, "learning_rate": 1.9377863625074665e-06, "loss": 0.4687, "step": 10578 }, { "epoch": 2.677549987344976, "grad_norm": 0.18794718384742737, "learning_rate": 1.936370077981827e-06, "loss": 0.4532, "step": 10579 }, { "epoch": 2.677803087825867, "grad_norm": 0.20113343000411987, "learning_rate": 1.9349542557269883e-06, "loss": 0.4391, "step": 10580 }, { "epoch": 2.6780561883067575, "grad_norm": 0.1889290064573288, "learning_rate": 1.933538895824113e-06, "loss": 0.446, "step": 10581 }, { "epoch": 2.6783092887876485, "grad_norm": 0.21310104429721832, "learning_rate": 1.9321239983543415e-06, "loss": 0.4361, "step": 10582 }, { "epoch": 2.6785623892685395, "grad_norm": 0.18908824026584625, "learning_rate": 1.9307095633987883e-06, "loss": 0.453, "step": 10583 }, { "epoch": 2.6788154897494305, "grad_norm": 0.21605269610881805, "learning_rate": 1.929295591038538e-06, "loss": 0.4567, "step": 10584 }, { "epoch": 2.6790685902303215, "grad_norm": 0.182106614112854, "learning_rate": 1.9278820813546517e-06, "loss": 0.4486, "step": 10585 }, { "epoch": 2.6793216907112125, "grad_norm": 0.18528088927268982, "learning_rate": 1.9264690344281635e-06, "loss": 0.4499, "step": 10586 }, { "epoch": 2.6795747911921035, "grad_norm": 0.1927052140235901, "learning_rate": 1.9250564503400793e-06, "loss": 0.4291, "step": 10587 }, { "epoch": 2.679827891672994, "grad_norm": 0.18866869807243347, "learning_rate": 1.923644329171379e-06, "loss": 0.4509, "step": 10588 }, { "epoch": 2.680080992153885, "grad_norm": 0.18642966449260712, "learning_rate": 1.922232671003018e-06, "loss": 0.4685, "step": 10589 }, { "epoch": 2.680334092634776, "grad_norm": 0.18610675632953644, "learning_rate": 1.9208214759159225e-06, "loss": 0.4391, "step": 10590 }, { "epoch": 2.680587193115667, "grad_norm": 0.1835828572511673, "learning_rate": 1.919410743990995e-06, "loss": 0.4504, "step": 10591 }, { "epoch": 2.6808402935965576, "grad_norm": 0.18546776473522186, "learning_rate": 1.918000475309111e-06, "loss": 0.4582, "step": 10592 }, { "epoch": 2.6810933940774486, "grad_norm": 0.22675910592079163, "learning_rate": 1.916590669951113e-06, "loss": 0.4575, "step": 10593 }, { "epoch": 2.6813464945583396, "grad_norm": 0.18727615475654602, "learning_rate": 1.9151813279978226e-06, "loss": 0.4519, "step": 10594 }, { "epoch": 2.6815995950392306, "grad_norm": 0.1872669756412506, "learning_rate": 1.913772449530039e-06, "loss": 0.4568, "step": 10595 }, { "epoch": 2.6818526955201216, "grad_norm": 0.18624404072761536, "learning_rate": 1.9123640346285275e-06, "loss": 0.4616, "step": 10596 }, { "epoch": 2.6821057960010126, "grad_norm": 0.1899319887161255, "learning_rate": 1.9109560833740316e-06, "loss": 0.4423, "step": 10597 }, { "epoch": 2.682358896481903, "grad_norm": 0.18655002117156982, "learning_rate": 1.9095485958472658e-06, "loss": 0.4542, "step": 10598 }, { "epoch": 2.682611996962794, "grad_norm": 0.18103568255901337, "learning_rate": 1.9081415721289155e-06, "loss": 0.4515, "step": 10599 }, { "epoch": 2.682865097443685, "grad_norm": 0.1875777393579483, "learning_rate": 1.906735012299641e-06, "loss": 0.4285, "step": 10600 }, { "epoch": 2.683118197924576, "grad_norm": 0.21962186694145203, "learning_rate": 1.9053289164400834e-06, "loss": 0.457, "step": 10601 }, { "epoch": 2.6833712984054667, "grad_norm": 0.18635612726211548, "learning_rate": 1.9039232846308476e-06, "loss": 0.4524, "step": 10602 }, { "epoch": 2.6836243988863577, "grad_norm": 0.1879514753818512, "learning_rate": 1.9025181169525197e-06, "loss": 0.4546, "step": 10603 }, { "epoch": 2.6838774993672487, "grad_norm": 0.1827278733253479, "learning_rate": 1.901113413485648e-06, "loss": 0.4407, "step": 10604 }, { "epoch": 2.6841305998481397, "grad_norm": 0.18851816654205322, "learning_rate": 1.8997091743107654e-06, "loss": 0.442, "step": 10605 }, { "epoch": 2.6843837003290307, "grad_norm": 0.1855103075504303, "learning_rate": 1.8983053995083721e-06, "loss": 0.4421, "step": 10606 }, { "epoch": 2.6846368008099217, "grad_norm": 0.18534676730632782, "learning_rate": 1.8969020891589428e-06, "loss": 0.4632, "step": 10607 }, { "epoch": 2.6848899012908123, "grad_norm": 0.19519789516925812, "learning_rate": 1.8954992433429308e-06, "loss": 0.4443, "step": 10608 }, { "epoch": 2.6851430017717033, "grad_norm": 0.18268486857414246, "learning_rate": 1.8940968621407574e-06, "loss": 0.4506, "step": 10609 }, { "epoch": 2.6853961022525943, "grad_norm": 0.18298004567623138, "learning_rate": 1.8926949456328148e-06, "loss": 0.4363, "step": 10610 }, { "epoch": 2.6856492027334853, "grad_norm": 0.18722693622112274, "learning_rate": 1.8912934938994721e-06, "loss": 0.4489, "step": 10611 }, { "epoch": 2.685902303214376, "grad_norm": 0.18959781527519226, "learning_rate": 1.889892507021074e-06, "loss": 0.4612, "step": 10612 }, { "epoch": 2.686155403695267, "grad_norm": 0.18314673006534576, "learning_rate": 1.8884919850779315e-06, "loss": 0.4256, "step": 10613 }, { "epoch": 2.686408504176158, "grad_norm": 0.1838972270488739, "learning_rate": 1.8870919281503418e-06, "loss": 0.4683, "step": 10614 }, { "epoch": 2.686661604657049, "grad_norm": 0.18687307834625244, "learning_rate": 1.8856923363185608e-06, "loss": 0.4674, "step": 10615 }, { "epoch": 2.68691470513794, "grad_norm": 0.18821589648723602, "learning_rate": 1.8842932096628263e-06, "loss": 0.4714, "step": 10616 }, { "epoch": 2.687167805618831, "grad_norm": 0.18245410919189453, "learning_rate": 1.8828945482633454e-06, "loss": 0.4449, "step": 10617 }, { "epoch": 2.687420906099722, "grad_norm": 0.18477500975131989, "learning_rate": 1.8814963522003028e-06, "loss": 0.4449, "step": 10618 }, { "epoch": 2.6876740065806124, "grad_norm": 0.20042678713798523, "learning_rate": 1.880098621553852e-06, "loss": 0.4461, "step": 10619 }, { "epoch": 2.6879271070615034, "grad_norm": 0.18551845848560333, "learning_rate": 1.8787013564041234e-06, "loss": 0.4547, "step": 10620 }, { "epoch": 2.6881802075423944, "grad_norm": 0.18100355565547943, "learning_rate": 1.8773045568312186e-06, "loss": 0.4374, "step": 10621 }, { "epoch": 2.6884333080232854, "grad_norm": 0.18661175668239594, "learning_rate": 1.8759082229152137e-06, "loss": 0.4374, "step": 10622 }, { "epoch": 2.688686408504176, "grad_norm": 0.18718157708644867, "learning_rate": 1.8745123547361565e-06, "loss": 0.4524, "step": 10623 }, { "epoch": 2.688939508985067, "grad_norm": 0.18544641137123108, "learning_rate": 1.87311695237407e-06, "loss": 0.4724, "step": 10624 }, { "epoch": 2.689192609465958, "grad_norm": 0.18199455738067627, "learning_rate": 1.87172201590895e-06, "loss": 0.4592, "step": 10625 }, { "epoch": 2.689445709946849, "grad_norm": 0.18819381296634674, "learning_rate": 1.8703275454207649e-06, "loss": 0.4397, "step": 10626 }, { "epoch": 2.68969881042774, "grad_norm": 0.1863316148519516, "learning_rate": 1.868933540989456e-06, "loss": 0.4598, "step": 10627 }, { "epoch": 2.689951910908631, "grad_norm": 0.19088983535766602, "learning_rate": 1.8675400026949397e-06, "loss": 0.4184, "step": 10628 }, { "epoch": 2.6902050113895215, "grad_norm": 0.1879187971353531, "learning_rate": 1.8661469306171044e-06, "loss": 0.4239, "step": 10629 }, { "epoch": 2.6904581118704125, "grad_norm": 0.188790962100029, "learning_rate": 1.8647543248358113e-06, "loss": 0.4501, "step": 10630 }, { "epoch": 2.6907112123513035, "grad_norm": 0.19373838603496552, "learning_rate": 1.8633621854308958e-06, "loss": 0.4541, "step": 10631 }, { "epoch": 2.6909643128321945, "grad_norm": 0.18986822664737701, "learning_rate": 1.8619705124821664e-06, "loss": 0.4652, "step": 10632 }, { "epoch": 2.691217413313085, "grad_norm": 0.18453486263751984, "learning_rate": 1.8605793060694043e-06, "loss": 0.4289, "step": 10633 }, { "epoch": 2.691470513793976, "grad_norm": 0.1837567389011383, "learning_rate": 1.8591885662723664e-06, "loss": 0.4765, "step": 10634 }, { "epoch": 2.691723614274867, "grad_norm": 0.18665811419487, "learning_rate": 1.8577982931707782e-06, "loss": 0.4362, "step": 10635 }, { "epoch": 2.691976714755758, "grad_norm": 0.18474097549915314, "learning_rate": 1.8564084868443421e-06, "loss": 0.4349, "step": 10636 }, { "epoch": 2.692229815236649, "grad_norm": 0.18348711729049683, "learning_rate": 1.8550191473727363e-06, "loss": 0.4212, "step": 10637 }, { "epoch": 2.69248291571754, "grad_norm": 0.1932244598865509, "learning_rate": 1.8536302748355995e-06, "loss": 0.5002, "step": 10638 }, { "epoch": 2.6927360161984306, "grad_norm": 0.18745777010917664, "learning_rate": 1.852241869312561e-06, "loss": 0.4455, "step": 10639 }, { "epoch": 2.6929891166793216, "grad_norm": 0.18727034330368042, "learning_rate": 1.850853930883214e-06, "loss": 0.4574, "step": 10640 }, { "epoch": 2.6932422171602126, "grad_norm": 0.18647171556949615, "learning_rate": 1.849466459627124e-06, "loss": 0.4636, "step": 10641 }, { "epoch": 2.6934953176411036, "grad_norm": 0.19345934689044952, "learning_rate": 1.8480794556238345e-06, "loss": 0.4676, "step": 10642 }, { "epoch": 2.693748418121994, "grad_norm": 0.18870657682418823, "learning_rate": 1.846692918952856e-06, "loss": 0.4649, "step": 10643 }, { "epoch": 2.694001518602885, "grad_norm": 0.1901041716337204, "learning_rate": 1.8453068496936744e-06, "loss": 0.452, "step": 10644 }, { "epoch": 2.694254619083776, "grad_norm": 0.1872105747461319, "learning_rate": 1.8439212479257561e-06, "loss": 0.4338, "step": 10645 }, { "epoch": 2.694507719564667, "grad_norm": 0.19107943773269653, "learning_rate": 1.8425361137285313e-06, "loss": 0.4362, "step": 10646 }, { "epoch": 2.694760820045558, "grad_norm": 0.18633130192756653, "learning_rate": 1.8411514471814063e-06, "loss": 0.4376, "step": 10647 }, { "epoch": 2.695013920526449, "grad_norm": 0.19420388340950012, "learning_rate": 1.8397672483637652e-06, "loss": 0.4485, "step": 10648 }, { "epoch": 2.69526702100734, "grad_norm": 0.19140924513339996, "learning_rate": 1.8383835173549558e-06, "loss": 0.4673, "step": 10649 }, { "epoch": 2.6955201214882307, "grad_norm": 0.1998133808374405, "learning_rate": 1.8370002542343056e-06, "loss": 0.4348, "step": 10650 }, { "epoch": 2.6957732219691217, "grad_norm": 0.18991732597351074, "learning_rate": 1.8356174590811137e-06, "loss": 0.4748, "step": 10651 }, { "epoch": 2.6960263224500127, "grad_norm": 0.19085434079170227, "learning_rate": 1.8342351319746554e-06, "loss": 0.4557, "step": 10652 }, { "epoch": 2.6962794229309037, "grad_norm": 0.18868015706539154, "learning_rate": 1.8328532729941795e-06, "loss": 0.4538, "step": 10653 }, { "epoch": 2.6965325234117943, "grad_norm": 0.19274403154850006, "learning_rate": 1.831471882218897e-06, "loss": 0.4575, "step": 10654 }, { "epoch": 2.6967856238926853, "grad_norm": 0.18572992086410522, "learning_rate": 1.8300909597280047e-06, "loss": 0.4638, "step": 10655 }, { "epoch": 2.6970387243735763, "grad_norm": 0.18433254957199097, "learning_rate": 1.8287105056006682e-06, "loss": 0.4442, "step": 10656 }, { "epoch": 2.6972918248544673, "grad_norm": 0.18460451066493988, "learning_rate": 1.8273305199160207e-06, "loss": 0.425, "step": 10657 }, { "epoch": 2.6975449253353583, "grad_norm": 0.1831716001033783, "learning_rate": 1.8259510027531824e-06, "loss": 0.4267, "step": 10658 }, { "epoch": 2.6977980258162493, "grad_norm": 0.18824675679206848, "learning_rate": 1.8245719541912355e-06, "loss": 0.4357, "step": 10659 }, { "epoch": 2.69805112629714, "grad_norm": 0.1861822009086609, "learning_rate": 1.8231933743092344e-06, "loss": 0.4552, "step": 10660 }, { "epoch": 2.698304226778031, "grad_norm": 0.18677295744419098, "learning_rate": 1.8218152631862118e-06, "loss": 0.4514, "step": 10661 }, { "epoch": 2.698557327258922, "grad_norm": 0.19373933970928192, "learning_rate": 1.8204376209011732e-06, "loss": 0.4632, "step": 10662 }, { "epoch": 2.698810427739813, "grad_norm": 0.19235080480575562, "learning_rate": 1.8190604475330942e-06, "loss": 0.4547, "step": 10663 }, { "epoch": 2.6990635282207034, "grad_norm": 0.192033588886261, "learning_rate": 1.817683743160925e-06, "loss": 0.443, "step": 10664 }, { "epoch": 2.6993166287015944, "grad_norm": 0.18771247565746307, "learning_rate": 1.8163075078635917e-06, "loss": 0.4696, "step": 10665 }, { "epoch": 2.6995697291824854, "grad_norm": 0.19289863109588623, "learning_rate": 1.8149317417199886e-06, "loss": 0.4348, "step": 10666 }, { "epoch": 2.6998228296633764, "grad_norm": 0.2076113224029541, "learning_rate": 1.813556444808987e-06, "loss": 0.4554, "step": 10667 }, { "epoch": 2.7000759301442674, "grad_norm": 0.1901702731847763, "learning_rate": 1.8121816172094275e-06, "loss": 0.451, "step": 10668 }, { "epoch": 2.7003290306251584, "grad_norm": 0.18853646516799927, "learning_rate": 1.810807259000129e-06, "loss": 0.4522, "step": 10669 }, { "epoch": 2.700582131106049, "grad_norm": 0.1936553716659546, "learning_rate": 1.8094333702598787e-06, "loss": 0.4366, "step": 10670 }, { "epoch": 2.70083523158694, "grad_norm": 0.18912246823310852, "learning_rate": 1.808059951067439e-06, "loss": 0.4379, "step": 10671 }, { "epoch": 2.701088332067831, "grad_norm": 0.1944301724433899, "learning_rate": 1.8066870015015447e-06, "loss": 0.4606, "step": 10672 }, { "epoch": 2.701341432548722, "grad_norm": 0.19089525938034058, "learning_rate": 1.805314521640905e-06, "loss": 0.4412, "step": 10673 }, { "epoch": 2.7015945330296125, "grad_norm": 0.18463876843452454, "learning_rate": 1.8039425115642005e-06, "loss": 0.4287, "step": 10674 }, { "epoch": 2.7018476335105035, "grad_norm": 0.1889955699443817, "learning_rate": 1.8025709713500871e-06, "loss": 0.4561, "step": 10675 }, { "epoch": 2.7021007339913945, "grad_norm": 0.22348275780677795, "learning_rate": 1.8011999010771897e-06, "loss": 0.4612, "step": 10676 }, { "epoch": 2.7023538344722855, "grad_norm": 0.19147798418998718, "learning_rate": 1.799829300824112e-06, "loss": 0.4588, "step": 10677 }, { "epoch": 2.7026069349531765, "grad_norm": 0.1838223934173584, "learning_rate": 1.7984591706694243e-06, "loss": 0.4425, "step": 10678 }, { "epoch": 2.7028600354340675, "grad_norm": 0.1903192698955536, "learning_rate": 1.7970895106916752e-06, "loss": 0.4608, "step": 10679 }, { "epoch": 2.7031131359149585, "grad_norm": 0.18922577798366547, "learning_rate": 1.7957203209693852e-06, "loss": 0.4835, "step": 10680 }, { "epoch": 2.703366236395849, "grad_norm": 0.18112771213054657, "learning_rate": 1.794351601581047e-06, "loss": 0.4341, "step": 10681 }, { "epoch": 2.70361933687674, "grad_norm": 0.18750961124897003, "learning_rate": 1.792983352605121e-06, "loss": 0.4492, "step": 10682 }, { "epoch": 2.703872437357631, "grad_norm": 0.18891900777816772, "learning_rate": 1.7916155741200537e-06, "loss": 0.4532, "step": 10683 }, { "epoch": 2.704125537838522, "grad_norm": 0.18331678211688995, "learning_rate": 1.790248266204252e-06, "loss": 0.446, "step": 10684 }, { "epoch": 2.7043786383194126, "grad_norm": 0.1923510581254959, "learning_rate": 1.7888814289361034e-06, "loss": 0.4152, "step": 10685 }, { "epoch": 2.7046317388003036, "grad_norm": 0.18373550474643707, "learning_rate": 1.7875150623939663e-06, "loss": 0.4289, "step": 10686 }, { "epoch": 2.7048848392811946, "grad_norm": 0.19172894954681396, "learning_rate": 1.786149166656168e-06, "loss": 0.4435, "step": 10687 }, { "epoch": 2.7051379397620856, "grad_norm": 0.18452277779579163, "learning_rate": 1.7847837418010116e-06, "loss": 0.4735, "step": 10688 }, { "epoch": 2.7053910402429766, "grad_norm": 0.18039272725582123, "learning_rate": 1.78341878790678e-06, "loss": 0.429, "step": 10689 }, { "epoch": 2.7056441407238676, "grad_norm": 0.18320657312870026, "learning_rate": 1.7820543050517192e-06, "loss": 0.4411, "step": 10690 }, { "epoch": 2.705897241204758, "grad_norm": 0.18752886354923248, "learning_rate": 1.7806902933140536e-06, "loss": 0.4664, "step": 10691 }, { "epoch": 2.706150341685649, "grad_norm": 0.20481432974338531, "learning_rate": 1.7793267527719804e-06, "loss": 0.4546, "step": 10692 }, { "epoch": 2.70640344216654, "grad_norm": 0.19275566935539246, "learning_rate": 1.777963683503663e-06, "loss": 0.445, "step": 10693 }, { "epoch": 2.706656542647431, "grad_norm": 0.19067701697349548, "learning_rate": 1.7766010855872485e-06, "loss": 0.4637, "step": 10694 }, { "epoch": 2.7069096431283217, "grad_norm": 0.19021695852279663, "learning_rate": 1.7752389591008456e-06, "loss": 0.4344, "step": 10695 }, { "epoch": 2.7071627436092127, "grad_norm": 0.1882503479719162, "learning_rate": 1.77387730412255e-06, "loss": 0.4324, "step": 10696 }, { "epoch": 2.7074158440901037, "grad_norm": 0.19207219779491425, "learning_rate": 1.7725161207304187e-06, "loss": 0.4574, "step": 10697 }, { "epoch": 2.7076689445709947, "grad_norm": 0.18933556973934174, "learning_rate": 1.7711554090024886e-06, "loss": 0.4519, "step": 10698 }, { "epoch": 2.7079220450518857, "grad_norm": 0.209825336933136, "learning_rate": 1.7697951690167604e-06, "loss": 0.4462, "step": 10699 }, { "epoch": 2.7081751455327767, "grad_norm": 0.19911493360996246, "learning_rate": 1.7684354008512173e-06, "loss": 0.4305, "step": 10700 }, { "epoch": 2.7084282460136673, "grad_norm": 0.18270131945610046, "learning_rate": 1.7670761045838091e-06, "loss": 0.4412, "step": 10701 }, { "epoch": 2.7086813464945583, "grad_norm": 0.18895377218723297, "learning_rate": 1.765717280292466e-06, "loss": 0.4533, "step": 10702 }, { "epoch": 2.7089344469754493, "grad_norm": 0.19562414288520813, "learning_rate": 1.7643589280550867e-06, "loss": 0.4402, "step": 10703 }, { "epoch": 2.7091875474563403, "grad_norm": 0.18556015193462372, "learning_rate": 1.7630010479495385e-06, "loss": 0.4464, "step": 10704 }, { "epoch": 2.709440647937231, "grad_norm": 0.1892724633216858, "learning_rate": 1.7616436400536662e-06, "loss": 0.4493, "step": 10705 }, { "epoch": 2.709693748418122, "grad_norm": 0.188039630651474, "learning_rate": 1.7602867044452898e-06, "loss": 0.4479, "step": 10706 }, { "epoch": 2.709946848899013, "grad_norm": 0.1889871507883072, "learning_rate": 1.7589302412021981e-06, "loss": 0.4773, "step": 10707 }, { "epoch": 2.710199949379904, "grad_norm": 0.1877419352531433, "learning_rate": 1.7575742504021508e-06, "loss": 0.4305, "step": 10708 }, { "epoch": 2.710453049860795, "grad_norm": 0.19058197736740112, "learning_rate": 1.756218732122893e-06, "loss": 0.4491, "step": 10709 }, { "epoch": 2.710706150341686, "grad_norm": 0.19006898999214172, "learning_rate": 1.7548636864421265e-06, "loss": 0.4848, "step": 10710 }, { "epoch": 2.710959250822577, "grad_norm": 0.1908760666847229, "learning_rate": 1.7535091134375338e-06, "loss": 0.4459, "step": 10711 }, { "epoch": 2.7112123513034674, "grad_norm": 0.19349859654903412, "learning_rate": 1.7521550131867715e-06, "loss": 0.4676, "step": 10712 }, { "epoch": 2.7114654517843584, "grad_norm": 0.20672377943992615, "learning_rate": 1.7508013857674666e-06, "loss": 0.4521, "step": 10713 }, { "epoch": 2.7117185522652494, "grad_norm": 0.1813478022813797, "learning_rate": 1.7494482312572192e-06, "loss": 0.4289, "step": 10714 }, { "epoch": 2.7119716527461404, "grad_norm": 0.23392893373966217, "learning_rate": 1.748095549733604e-06, "loss": 0.46, "step": 10715 }, { "epoch": 2.712224753227031, "grad_norm": 0.18938565254211426, "learning_rate": 1.7467433412741663e-06, "loss": 0.4592, "step": 10716 }, { "epoch": 2.712477853707922, "grad_norm": 0.18900200724601746, "learning_rate": 1.7453916059564247e-06, "loss": 0.4536, "step": 10717 }, { "epoch": 2.712730954188813, "grad_norm": 0.19520339369773865, "learning_rate": 1.7440403438578736e-06, "loss": 0.4862, "step": 10718 }, { "epoch": 2.712984054669704, "grad_norm": 0.1828160583972931, "learning_rate": 1.7426895550559764e-06, "loss": 0.4461, "step": 10719 }, { "epoch": 2.713237155150595, "grad_norm": 0.20213733613491058, "learning_rate": 1.7413392396281714e-06, "loss": 0.4596, "step": 10720 }, { "epoch": 2.713490255631486, "grad_norm": 0.1876918375492096, "learning_rate": 1.739989397651869e-06, "loss": 0.4502, "step": 10721 }, { "epoch": 2.7137433561123765, "grad_norm": 0.18482975661754608, "learning_rate": 1.7386400292044526e-06, "loss": 0.4423, "step": 10722 }, { "epoch": 2.7139964565932675, "grad_norm": 0.21539390087127686, "learning_rate": 1.7372911343632792e-06, "loss": 0.446, "step": 10723 }, { "epoch": 2.7142495570741585, "grad_norm": 0.18920376896858215, "learning_rate": 1.7359427132056782e-06, "loss": 0.4598, "step": 10724 }, { "epoch": 2.7145026575550495, "grad_norm": 0.19459891319274902, "learning_rate": 1.7345947658089536e-06, "loss": 0.4586, "step": 10725 }, { "epoch": 2.71475575803594, "grad_norm": 0.19324526190757751, "learning_rate": 1.733247292250373e-06, "loss": 0.4441, "step": 10726 }, { "epoch": 2.715008858516831, "grad_norm": 0.18513906002044678, "learning_rate": 1.7319002926071914e-06, "loss": 0.4647, "step": 10727 }, { "epoch": 2.715261958997722, "grad_norm": 0.19062812626361847, "learning_rate": 1.7305537669566286e-06, "loss": 0.4335, "step": 10728 }, { "epoch": 2.715515059478613, "grad_norm": 0.18702419102191925, "learning_rate": 1.7292077153758758e-06, "loss": 0.4378, "step": 10729 }, { "epoch": 2.715768159959504, "grad_norm": 0.20221152901649475, "learning_rate": 1.7278621379421e-06, "loss": 0.4406, "step": 10730 }, { "epoch": 2.716021260440395, "grad_norm": 0.19076867401599884, "learning_rate": 1.7265170347324445e-06, "loss": 0.4564, "step": 10731 }, { "epoch": 2.7162743609212856, "grad_norm": 0.18771953880786896, "learning_rate": 1.7251724058240116e-06, "loss": 0.4433, "step": 10732 }, { "epoch": 2.7165274614021766, "grad_norm": 0.20086456835269928, "learning_rate": 1.7238282512938942e-06, "loss": 0.475, "step": 10733 }, { "epoch": 2.7167805618830676, "grad_norm": 0.20164425671100616, "learning_rate": 1.7224845712191474e-06, "loss": 0.4601, "step": 10734 }, { "epoch": 2.7170336623639586, "grad_norm": 0.1912437528371811, "learning_rate": 1.7211413656768017e-06, "loss": 0.4749, "step": 10735 }, { "epoch": 2.717286762844849, "grad_norm": 0.19151702523231506, "learning_rate": 1.7197986347438622e-06, "loss": 0.4392, "step": 10736 }, { "epoch": 2.71753986332574, "grad_norm": 0.18975679576396942, "learning_rate": 1.7184563784972997e-06, "loss": 0.438, "step": 10737 }, { "epoch": 2.717792963806631, "grad_norm": 0.1919727474451065, "learning_rate": 1.7171145970140668e-06, "loss": 0.4524, "step": 10738 }, { "epoch": 2.718046064287522, "grad_norm": 0.18876530230045319, "learning_rate": 1.7157732903710812e-06, "loss": 0.4505, "step": 10739 }, { "epoch": 2.718299164768413, "grad_norm": 0.18480364978313446, "learning_rate": 1.7144324586452433e-06, "loss": 0.4568, "step": 10740 }, { "epoch": 2.718552265249304, "grad_norm": 0.20406852662563324, "learning_rate": 1.713092101913415e-06, "loss": 0.4274, "step": 10741 }, { "epoch": 2.718805365730195, "grad_norm": 0.18551349639892578, "learning_rate": 1.7117522202524416e-06, "loss": 0.4636, "step": 10742 }, { "epoch": 2.7190584662110857, "grad_norm": 0.2021525502204895, "learning_rate": 1.7104128137391296e-06, "loss": 0.4371, "step": 10743 }, { "epoch": 2.7193115666919767, "grad_norm": 0.185463547706604, "learning_rate": 1.7090738824502672e-06, "loss": 0.4537, "step": 10744 }, { "epoch": 2.7195646671728677, "grad_norm": 0.19260159134864807, "learning_rate": 1.7077354264626112e-06, "loss": 0.4359, "step": 10745 }, { "epoch": 2.7198177676537583, "grad_norm": 0.18522857129573822, "learning_rate": 1.7063974458528908e-06, "loss": 0.4508, "step": 10746 }, { "epoch": 2.7200708681346493, "grad_norm": 0.1878800094127655, "learning_rate": 1.7050599406978174e-06, "loss": 0.4601, "step": 10747 }, { "epoch": 2.7203239686155403, "grad_norm": 0.18913814425468445, "learning_rate": 1.7037229110740594e-06, "loss": 0.4658, "step": 10748 }, { "epoch": 2.7205770690964313, "grad_norm": 0.19317778944969177, "learning_rate": 1.7023863570582688e-06, "loss": 0.4446, "step": 10749 }, { "epoch": 2.7208301695773223, "grad_norm": 0.20469142496585846, "learning_rate": 1.7010502787270666e-06, "loss": 0.4535, "step": 10750 }, { "epoch": 2.7210832700582133, "grad_norm": 0.19406536221504211, "learning_rate": 1.6997146761570483e-06, "loss": 0.4568, "step": 10751 }, { "epoch": 2.7213363705391043, "grad_norm": 0.1859545260667801, "learning_rate": 1.6983795494247779e-06, "loss": 0.4417, "step": 10752 }, { "epoch": 2.721589471019995, "grad_norm": 0.19163824617862701, "learning_rate": 1.6970448986068032e-06, "loss": 0.4519, "step": 10753 }, { "epoch": 2.721842571500886, "grad_norm": 0.1880943775177002, "learning_rate": 1.6957107237796288e-06, "loss": 0.4632, "step": 10754 }, { "epoch": 2.722095671981777, "grad_norm": 0.18506546318531036, "learning_rate": 1.6943770250197444e-06, "loss": 0.4374, "step": 10755 }, { "epoch": 2.722348772462668, "grad_norm": 0.19333291053771973, "learning_rate": 1.6930438024036055e-06, "loss": 0.4547, "step": 10756 }, { "epoch": 2.7226018729435584, "grad_norm": 0.18323823809623718, "learning_rate": 1.6917110560076454e-06, "loss": 0.425, "step": 10757 }, { "epoch": 2.7228549734244494, "grad_norm": 0.18441985547542572, "learning_rate": 1.6903787859082666e-06, "loss": 0.4273, "step": 10758 }, { "epoch": 2.7231080739053404, "grad_norm": 0.1887933760881424, "learning_rate": 1.6890469921818443e-06, "loss": 0.4567, "step": 10759 }, { "epoch": 2.7233611743862314, "grad_norm": 0.18905070424079895, "learning_rate": 1.6877156749047297e-06, "loss": 0.4709, "step": 10760 }, { "epoch": 2.7236142748671224, "grad_norm": 0.1835470348596573, "learning_rate": 1.686384834153242e-06, "loss": 0.4493, "step": 10761 }, { "epoch": 2.7238673753480134, "grad_norm": 0.18342779576778412, "learning_rate": 1.6850544700036776e-06, "loss": 0.4641, "step": 10762 }, { "epoch": 2.724120475828904, "grad_norm": 0.19074350595474243, "learning_rate": 1.6837245825323012e-06, "loss": 0.4548, "step": 10763 }, { "epoch": 2.724373576309795, "grad_norm": 0.1975051313638687, "learning_rate": 1.6823951718153543e-06, "loss": 0.4634, "step": 10764 }, { "epoch": 2.724626676790686, "grad_norm": 0.19211143255233765, "learning_rate": 1.6810662379290487e-06, "loss": 0.4585, "step": 10765 }, { "epoch": 2.724879777271577, "grad_norm": 0.1861732006072998, "learning_rate": 1.6797377809495684e-06, "loss": 0.4534, "step": 10766 }, { "epoch": 2.7251328777524675, "grad_norm": 0.1845892071723938, "learning_rate": 1.6784098009530724e-06, "loss": 0.4534, "step": 10767 }, { "epoch": 2.7253859782333585, "grad_norm": 0.18696758151054382, "learning_rate": 1.6770822980156908e-06, "loss": 0.4715, "step": 10768 }, { "epoch": 2.7256390787142495, "grad_norm": 0.19098636507987976, "learning_rate": 1.675755272213525e-06, "loss": 0.4668, "step": 10769 }, { "epoch": 2.7258921791951405, "grad_norm": 0.18583600223064423, "learning_rate": 1.674428723622653e-06, "loss": 0.446, "step": 10770 }, { "epoch": 2.7261452796760315, "grad_norm": 0.1841953694820404, "learning_rate": 1.6731026523191207e-06, "loss": 0.4392, "step": 10771 }, { "epoch": 2.7263983801569225, "grad_norm": 0.1813010722398758, "learning_rate": 1.6717770583789505e-06, "loss": 0.4425, "step": 10772 }, { "epoch": 2.7266514806378135, "grad_norm": 0.7078092098236084, "learning_rate": 1.6704519418781351e-06, "loss": 0.4406, "step": 10773 }, { "epoch": 2.726904581118704, "grad_norm": 0.18610543012619019, "learning_rate": 1.6691273028926414e-06, "loss": 0.4565, "step": 10774 }, { "epoch": 2.727157681599595, "grad_norm": 0.18639329075813293, "learning_rate": 1.6678031414984096e-06, "loss": 0.4271, "step": 10775 }, { "epoch": 2.727410782080486, "grad_norm": 0.18806353211402893, "learning_rate": 1.6664794577713471e-06, "loss": 0.4558, "step": 10776 }, { "epoch": 2.7276638825613766, "grad_norm": 0.18364036083221436, "learning_rate": 1.6651562517873366e-06, "loss": 0.433, "step": 10777 }, { "epoch": 2.7279169830422676, "grad_norm": 0.18933017551898956, "learning_rate": 1.6638335236222414e-06, "loss": 0.4393, "step": 10778 }, { "epoch": 2.7281700835231586, "grad_norm": 0.1897771656513214, "learning_rate": 1.6625112733518866e-06, "loss": 0.426, "step": 10779 }, { "epoch": 2.7284231840040496, "grad_norm": 0.18335793912410736, "learning_rate": 1.661189501052074e-06, "loss": 0.4702, "step": 10780 }, { "epoch": 2.7286762844849406, "grad_norm": 0.1854332685470581, "learning_rate": 1.6598682067985817e-06, "loss": 0.4396, "step": 10781 }, { "epoch": 2.7289293849658316, "grad_norm": 0.18751871585845947, "learning_rate": 1.6585473906671524e-06, "loss": 0.4326, "step": 10782 }, { "epoch": 2.7291824854467226, "grad_norm": 0.18857653439044952, "learning_rate": 1.6572270527335033e-06, "loss": 0.4731, "step": 10783 }, { "epoch": 2.729435585927613, "grad_norm": 0.18325859308242798, "learning_rate": 1.6559071930733328e-06, "loss": 0.4673, "step": 10784 }, { "epoch": 2.729688686408504, "grad_norm": 0.18695184588432312, "learning_rate": 1.6545878117623027e-06, "loss": 0.4456, "step": 10785 }, { "epoch": 2.729941786889395, "grad_norm": 0.18161164224147797, "learning_rate": 1.6532689088760533e-06, "loss": 0.4229, "step": 10786 }, { "epoch": 2.730194887370286, "grad_norm": 0.20500795543193817, "learning_rate": 1.6519504844901902e-06, "loss": 0.471, "step": 10787 }, { "epoch": 2.7304479878511767, "grad_norm": 0.190902441740036, "learning_rate": 1.6506325386802968e-06, "loss": 0.4252, "step": 10788 }, { "epoch": 2.7307010883320677, "grad_norm": 0.18845456838607788, "learning_rate": 1.6493150715219285e-06, "loss": 0.4593, "step": 10789 }, { "epoch": 2.7309541888129587, "grad_norm": 0.19193415343761444, "learning_rate": 1.6479980830906116e-06, "loss": 0.4499, "step": 10790 }, { "epoch": 2.7312072892938497, "grad_norm": 0.18570420145988464, "learning_rate": 1.6466815734618513e-06, "loss": 0.3992, "step": 10791 }, { "epoch": 2.7314603897747407, "grad_norm": 0.1875656098127365, "learning_rate": 1.6453655427111181e-06, "loss": 0.4263, "step": 10792 }, { "epoch": 2.7317134902556317, "grad_norm": 0.1882130205631256, "learning_rate": 1.644049990913854e-06, "loss": 0.4369, "step": 10793 }, { "epoch": 2.7319665907365223, "grad_norm": 0.18661092221736908, "learning_rate": 1.6427349181454787e-06, "loss": 0.468, "step": 10794 }, { "epoch": 2.7322196912174133, "grad_norm": 0.1851874589920044, "learning_rate": 1.6414203244813831e-06, "loss": 0.4666, "step": 10795 }, { "epoch": 2.7324727916983043, "grad_norm": 0.18214388191699982, "learning_rate": 1.6401062099969279e-06, "loss": 0.4506, "step": 10796 }, { "epoch": 2.7327258921791953, "grad_norm": 0.1864897608757019, "learning_rate": 1.6387925747674538e-06, "loss": 0.4479, "step": 10797 }, { "epoch": 2.732978992660086, "grad_norm": 0.19093668460845947, "learning_rate": 1.6374794188682641e-06, "loss": 0.4368, "step": 10798 }, { "epoch": 2.733232093140977, "grad_norm": 0.19257505238056183, "learning_rate": 1.6361667423746397e-06, "loss": 0.4443, "step": 10799 }, { "epoch": 2.733485193621868, "grad_norm": 0.18559688329696655, "learning_rate": 1.6348545453618336e-06, "loss": 0.4613, "step": 10800 }, { "epoch": 2.733738294102759, "grad_norm": 0.18721972405910492, "learning_rate": 1.6335428279050735e-06, "loss": 0.4565, "step": 10801 }, { "epoch": 2.73399139458365, "grad_norm": 0.18972718715667725, "learning_rate": 1.6322315900795549e-06, "loss": 0.4549, "step": 10802 }, { "epoch": 2.734244495064541, "grad_norm": 0.18121817708015442, "learning_rate": 1.6309208319604485e-06, "loss": 0.4623, "step": 10803 }, { "epoch": 2.7344975955454314, "grad_norm": 0.20752465724945068, "learning_rate": 1.629610553622899e-06, "loss": 0.4388, "step": 10804 }, { "epoch": 2.7347506960263224, "grad_norm": 0.1899009346961975, "learning_rate": 1.6283007551420206e-06, "loss": 0.4638, "step": 10805 }, { "epoch": 2.7350037965072134, "grad_norm": 0.1887660026550293, "learning_rate": 1.6269914365929007e-06, "loss": 0.441, "step": 10806 }, { "epoch": 2.7352568969881044, "grad_norm": 0.1970963031053543, "learning_rate": 1.6256825980506019e-06, "loss": 0.4519, "step": 10807 }, { "epoch": 2.735509997468995, "grad_norm": 0.18963074684143066, "learning_rate": 1.624374239590155e-06, "loss": 0.4377, "step": 10808 }, { "epoch": 2.735763097949886, "grad_norm": 0.19024135172367096, "learning_rate": 1.623066361286566e-06, "loss": 0.4647, "step": 10809 }, { "epoch": 2.736016198430777, "grad_norm": 0.6665005683898926, "learning_rate": 1.6217589632148135e-06, "loss": 0.5113, "step": 10810 }, { "epoch": 2.736269298911668, "grad_norm": 0.18749161064624786, "learning_rate": 1.6204520454498473e-06, "loss": 0.4682, "step": 10811 }, { "epoch": 2.736522399392559, "grad_norm": 0.19256168603897095, "learning_rate": 1.61914560806659e-06, "loss": 0.442, "step": 10812 }, { "epoch": 2.73677549987345, "grad_norm": 0.19638700783252716, "learning_rate": 1.6178396511399374e-06, "loss": 0.4516, "step": 10813 }, { "epoch": 2.737028600354341, "grad_norm": 0.19271452724933624, "learning_rate": 1.616534174744757e-06, "loss": 0.4685, "step": 10814 }, { "epoch": 2.7372817008352315, "grad_norm": 0.18640977144241333, "learning_rate": 1.6152291789558883e-06, "loss": 0.4383, "step": 10815 }, { "epoch": 2.7375348013161225, "grad_norm": 0.18143045902252197, "learning_rate": 1.6139246638481454e-06, "loss": 0.4356, "step": 10816 }, { "epoch": 2.7377879017970135, "grad_norm": 0.18178366124629974, "learning_rate": 1.612620629496312e-06, "loss": 0.4757, "step": 10817 }, { "epoch": 2.7380410022779045, "grad_norm": 0.18867850303649902, "learning_rate": 1.6113170759751472e-06, "loss": 0.4426, "step": 10818 }, { "epoch": 2.738294102758795, "grad_norm": 0.19954080879688263, "learning_rate": 1.6100140033593814e-06, "loss": 0.4642, "step": 10819 }, { "epoch": 2.738547203239686, "grad_norm": 0.1842670440673828, "learning_rate": 1.6087114117237146e-06, "loss": 0.4403, "step": 10820 }, { "epoch": 2.738800303720577, "grad_norm": 0.18283335864543915, "learning_rate": 1.6074093011428193e-06, "loss": 0.4598, "step": 10821 }, { "epoch": 2.739053404201468, "grad_norm": 0.18516530096530914, "learning_rate": 1.6061076716913482e-06, "loss": 0.4397, "step": 10822 }, { "epoch": 2.739306504682359, "grad_norm": 0.2137739211320877, "learning_rate": 1.604806523443919e-06, "loss": 0.4616, "step": 10823 }, { "epoch": 2.73955960516325, "grad_norm": 0.18811093270778656, "learning_rate": 1.6035058564751226e-06, "loss": 0.4353, "step": 10824 }, { "epoch": 2.7398127056441406, "grad_norm": 0.1938185840845108, "learning_rate": 1.6022056708595279e-06, "loss": 0.4522, "step": 10825 }, { "epoch": 2.7400658061250316, "grad_norm": 0.1905793994665146, "learning_rate": 1.6009059666716665e-06, "loss": 0.4618, "step": 10826 }, { "epoch": 2.7403189066059226, "grad_norm": 0.19205132126808167, "learning_rate": 1.5996067439860464e-06, "loss": 0.4559, "step": 10827 }, { "epoch": 2.7405720070868136, "grad_norm": 0.19092683494091034, "learning_rate": 1.5983080028771547e-06, "loss": 0.4304, "step": 10828 }, { "epoch": 2.740825107567704, "grad_norm": 0.18595877289772034, "learning_rate": 1.597009743419443e-06, "loss": 0.4458, "step": 10829 }, { "epoch": 2.741078208048595, "grad_norm": 0.1941840499639511, "learning_rate": 1.5957119656873388e-06, "loss": 0.4763, "step": 10830 }, { "epoch": 2.741331308529486, "grad_norm": 0.19735024869441986, "learning_rate": 1.594414669755241e-06, "loss": 0.4383, "step": 10831 }, { "epoch": 2.741584409010377, "grad_norm": 0.1921677589416504, "learning_rate": 1.593117855697519e-06, "loss": 0.4501, "step": 10832 }, { "epoch": 2.741837509491268, "grad_norm": 0.1911821812391281, "learning_rate": 1.591821523588517e-06, "loss": 0.4738, "step": 10833 }, { "epoch": 2.742090609972159, "grad_norm": 0.1879289299249649, "learning_rate": 1.5905256735025475e-06, "loss": 0.4364, "step": 10834 }, { "epoch": 2.7423437104530497, "grad_norm": 0.18365800380706787, "learning_rate": 1.5892303055139068e-06, "loss": 0.4205, "step": 10835 }, { "epoch": 2.7425968109339407, "grad_norm": 0.18991513550281525, "learning_rate": 1.5879354196968533e-06, "loss": 0.4368, "step": 10836 }, { "epoch": 2.7428499114148317, "grad_norm": 0.19046629965305328, "learning_rate": 1.586641016125615e-06, "loss": 0.4811, "step": 10837 }, { "epoch": 2.7431030118957227, "grad_norm": 0.19811348617076874, "learning_rate": 1.585347094874401e-06, "loss": 0.4489, "step": 10838 }, { "epoch": 2.7433561123766133, "grad_norm": 0.1862098127603531, "learning_rate": 1.5840536560173881e-06, "loss": 0.471, "step": 10839 }, { "epoch": 2.7436092128575043, "grad_norm": 0.20495723187923431, "learning_rate": 1.5827606996287248e-06, "loss": 0.4456, "step": 10840 }, { "epoch": 2.7438623133383953, "grad_norm": 0.19051222503185272, "learning_rate": 1.5814682257825386e-06, "loss": 0.4509, "step": 10841 }, { "epoch": 2.7441154138192863, "grad_norm": 0.19056947529315948, "learning_rate": 1.5801762345529227e-06, "loss": 0.4639, "step": 10842 }, { "epoch": 2.7443685143001773, "grad_norm": 0.18716110289096832, "learning_rate": 1.5788847260139406e-06, "loss": 0.4553, "step": 10843 }, { "epoch": 2.7446216147810683, "grad_norm": 0.18310093879699707, "learning_rate": 1.5775937002396336e-06, "loss": 0.432, "step": 10844 }, { "epoch": 2.7448747152619593, "grad_norm": 0.19664499163627625, "learning_rate": 1.5763031573040145e-06, "loss": 0.4368, "step": 10845 }, { "epoch": 2.74512781574285, "grad_norm": 0.18563328683376312, "learning_rate": 1.5750130972810662e-06, "loss": 0.4372, "step": 10846 }, { "epoch": 2.745380916223741, "grad_norm": 0.18935388326644897, "learning_rate": 1.5737235202447464e-06, "loss": 0.4414, "step": 10847 }, { "epoch": 2.745634016704632, "grad_norm": 0.18697485327720642, "learning_rate": 1.5724344262689816e-06, "loss": 0.4539, "step": 10848 }, { "epoch": 2.745887117185523, "grad_norm": 0.18489663302898407, "learning_rate": 1.571145815427676e-06, "loss": 0.458, "step": 10849 }, { "epoch": 2.7461402176664134, "grad_norm": 0.19040612876415253, "learning_rate": 1.5698576877946992e-06, "loss": 0.4623, "step": 10850 }, { "epoch": 2.7463933181473044, "grad_norm": 0.19297322630882263, "learning_rate": 1.5685700434438999e-06, "loss": 0.4681, "step": 10851 }, { "epoch": 2.7466464186281954, "grad_norm": 0.1871175915002823, "learning_rate": 1.567282882449095e-06, "loss": 0.4309, "step": 10852 }, { "epoch": 2.7468995191090864, "grad_norm": 0.18715374171733856, "learning_rate": 1.5659962048840749e-06, "loss": 0.4479, "step": 10853 }, { "epoch": 2.7471526195899774, "grad_norm": 0.18993161618709564, "learning_rate": 1.564710010822601e-06, "loss": 0.4598, "step": 10854 }, { "epoch": 2.7474057200708684, "grad_norm": 0.1895006000995636, "learning_rate": 1.5634243003384087e-06, "loss": 0.4475, "step": 10855 }, { "epoch": 2.747658820551759, "grad_norm": 0.18528281152248383, "learning_rate": 1.5621390735052056e-06, "loss": 0.463, "step": 10856 }, { "epoch": 2.74791192103265, "grad_norm": 0.18444998562335968, "learning_rate": 1.5608543303966705e-06, "loss": 0.456, "step": 10857 }, { "epoch": 2.748165021513541, "grad_norm": 0.1846940517425537, "learning_rate": 1.5595700710864558e-06, "loss": 0.4523, "step": 10858 }, { "epoch": 2.748418121994432, "grad_norm": 0.18477055430412292, "learning_rate": 1.5582862956481849e-06, "loss": 0.447, "step": 10859 }, { "epoch": 2.7486712224753225, "grad_norm": 0.19135423004627228, "learning_rate": 1.557003004155453e-06, "loss": 0.4666, "step": 10860 }, { "epoch": 2.7489243229562135, "grad_norm": 0.1897340714931488, "learning_rate": 1.555720196681829e-06, "loss": 0.4632, "step": 10861 }, { "epoch": 2.7491774234371045, "grad_norm": 0.18290121853351593, "learning_rate": 1.5544378733008536e-06, "loss": 0.4255, "step": 10862 }, { "epoch": 2.7494305239179955, "grad_norm": 0.18928471207618713, "learning_rate": 1.5531560340860407e-06, "loss": 0.4293, "step": 10863 }, { "epoch": 2.7496836243988865, "grad_norm": 0.19538670778274536, "learning_rate": 1.5518746791108763e-06, "loss": 0.4645, "step": 10864 }, { "epoch": 2.7499367248797775, "grad_norm": 0.18508665263652802, "learning_rate": 1.5505938084488114e-06, "loss": 0.4437, "step": 10865 }, { "epoch": 2.750189825360668, "grad_norm": 0.18484947085380554, "learning_rate": 1.5493134221732832e-06, "loss": 0.4385, "step": 10866 }, { "epoch": 2.750442925841559, "grad_norm": 0.18503354489803314, "learning_rate": 1.5480335203576902e-06, "loss": 0.431, "step": 10867 }, { "epoch": 2.75069602632245, "grad_norm": 0.20019982755184174, "learning_rate": 1.5467541030754063e-06, "loss": 0.4515, "step": 10868 }, { "epoch": 2.750949126803341, "grad_norm": 0.18714971840381622, "learning_rate": 1.5454751703997818e-06, "loss": 0.4597, "step": 10869 }, { "epoch": 2.7512022272842316, "grad_norm": 0.19452440738677979, "learning_rate": 1.5441967224041276e-06, "loss": 0.4394, "step": 10870 }, { "epoch": 2.7514553277651226, "grad_norm": 0.1878664195537567, "learning_rate": 1.5429187591617378e-06, "loss": 0.4663, "step": 10871 }, { "epoch": 2.7517084282460136, "grad_norm": 0.29313936829566956, "learning_rate": 1.541641280745877e-06, "loss": 0.4447, "step": 10872 }, { "epoch": 2.7519615287269046, "grad_norm": 0.18324850499629974, "learning_rate": 1.5403642872297808e-06, "loss": 0.4666, "step": 10873 }, { "epoch": 2.7522146292077956, "grad_norm": 0.19437596201896667, "learning_rate": 1.5390877786866542e-06, "loss": 0.4575, "step": 10874 }, { "epoch": 2.7524677296886866, "grad_norm": 0.18439547717571259, "learning_rate": 1.5378117551896798e-06, "loss": 0.465, "step": 10875 }, { "epoch": 2.7527208301695776, "grad_norm": 0.1926000565290451, "learning_rate": 1.5365362168120046e-06, "loss": 0.4526, "step": 10876 }, { "epoch": 2.752973930650468, "grad_norm": 0.1890360563993454, "learning_rate": 1.5352611636267557e-06, "loss": 0.4358, "step": 10877 }, { "epoch": 2.753227031131359, "grad_norm": 0.18792372941970825, "learning_rate": 1.5339865957070254e-06, "loss": 0.4479, "step": 10878 }, { "epoch": 2.75348013161225, "grad_norm": 0.18951117992401123, "learning_rate": 1.532712513125888e-06, "loss": 0.4814, "step": 10879 }, { "epoch": 2.753733232093141, "grad_norm": 0.18602293729782104, "learning_rate": 1.5314389159563835e-06, "loss": 0.4494, "step": 10880 }, { "epoch": 2.7539863325740317, "grad_norm": 0.24481068551540375, "learning_rate": 1.5301658042715196e-06, "loss": 0.433, "step": 10881 }, { "epoch": 2.7542394330549227, "grad_norm": 0.1851472407579422, "learning_rate": 1.528893178144284e-06, "loss": 0.4618, "step": 10882 }, { "epoch": 2.7544925335358137, "grad_norm": 0.18980452418327332, "learning_rate": 1.5276210376476318e-06, "loss": 0.4572, "step": 10883 }, { "epoch": 2.7547456340167047, "grad_norm": 0.18807728588581085, "learning_rate": 1.526349382854495e-06, "loss": 0.4268, "step": 10884 }, { "epoch": 2.7549987344975957, "grad_norm": 0.18911075592041016, "learning_rate": 1.5250782138377707e-06, "loss": 0.4575, "step": 10885 }, { "epoch": 2.7552518349784867, "grad_norm": 0.1856330931186676, "learning_rate": 1.5238075306703393e-06, "loss": 0.4558, "step": 10886 }, { "epoch": 2.7555049354593772, "grad_norm": 0.20188260078430176, "learning_rate": 1.5225373334250393e-06, "loss": 0.4517, "step": 10887 }, { "epoch": 2.7557580359402682, "grad_norm": 0.18790841102600098, "learning_rate": 1.5212676221746925e-06, "loss": 0.4493, "step": 10888 }, { "epoch": 2.7560111364211592, "grad_norm": 0.1927851140499115, "learning_rate": 1.5199983969920862e-06, "loss": 0.4476, "step": 10889 }, { "epoch": 2.7562642369020502, "grad_norm": 0.20649097859859467, "learning_rate": 1.5187296579499844e-06, "loss": 0.4613, "step": 10890 }, { "epoch": 2.756517337382941, "grad_norm": 0.18235309422016144, "learning_rate": 1.5174614051211189e-06, "loss": 0.4446, "step": 10891 }, { "epoch": 2.756770437863832, "grad_norm": 0.18710000813007355, "learning_rate": 1.5161936385782006e-06, "loss": 0.4463, "step": 10892 }, { "epoch": 2.757023538344723, "grad_norm": 0.18638913333415985, "learning_rate": 1.5149263583939034e-06, "loss": 0.4388, "step": 10893 }, { "epoch": 2.757276638825614, "grad_norm": 0.19009929895401, "learning_rate": 1.5136595646408792e-06, "loss": 0.4546, "step": 10894 }, { "epoch": 2.757529739306505, "grad_norm": 0.19265538454055786, "learning_rate": 1.5123932573917499e-06, "loss": 0.4738, "step": 10895 }, { "epoch": 2.757782839787396, "grad_norm": 0.20466256141662598, "learning_rate": 1.511127436719112e-06, "loss": 0.4256, "step": 10896 }, { "epoch": 2.7580359402682864, "grad_norm": 0.18869969248771667, "learning_rate": 1.5098621026955296e-06, "loss": 0.4278, "step": 10897 }, { "epoch": 2.7582890407491774, "grad_norm": 0.18428292870521545, "learning_rate": 1.5085972553935446e-06, "loss": 0.4639, "step": 10898 }, { "epoch": 2.7585421412300684, "grad_norm": 0.18521948158740997, "learning_rate": 1.5073328948856658e-06, "loss": 0.4284, "step": 10899 }, { "epoch": 2.7587952417109594, "grad_norm": 0.18867680430412292, "learning_rate": 1.5060690212443773e-06, "loss": 0.4425, "step": 10900 }, { "epoch": 2.75904834219185, "grad_norm": 0.19216831028461456, "learning_rate": 1.5048056345421346e-06, "loss": 0.461, "step": 10901 }, { "epoch": 2.759301442672741, "grad_norm": 0.19356101751327515, "learning_rate": 1.5035427348513643e-06, "loss": 0.4577, "step": 10902 }, { "epoch": 2.759554543153632, "grad_norm": 0.18654516339302063, "learning_rate": 1.502280322244466e-06, "loss": 0.444, "step": 10903 }, { "epoch": 2.759807643634523, "grad_norm": 0.1862822026014328, "learning_rate": 1.5010183967938108e-06, "loss": 0.4514, "step": 10904 }, { "epoch": 2.760060744115414, "grad_norm": 0.19031672179698944, "learning_rate": 1.499756958571742e-06, "loss": 0.4396, "step": 10905 }, { "epoch": 2.760313844596305, "grad_norm": 0.18869811296463013, "learning_rate": 1.498496007650576e-06, "loss": 0.4544, "step": 10906 }, { "epoch": 2.760566945077196, "grad_norm": 0.1900968700647354, "learning_rate": 1.497235544102601e-06, "loss": 0.4563, "step": 10907 }, { "epoch": 2.7608200455580865, "grad_norm": 0.20590589940547943, "learning_rate": 1.4959755680000776e-06, "loss": 0.4511, "step": 10908 }, { "epoch": 2.7610731460389775, "grad_norm": 0.1917276382446289, "learning_rate": 1.4947160794152315e-06, "loss": 0.4608, "step": 10909 }, { "epoch": 2.7613262465198685, "grad_norm": 0.19667592644691467, "learning_rate": 1.4934570784202718e-06, "loss": 0.4571, "step": 10910 }, { "epoch": 2.7615793470007595, "grad_norm": 0.19057251513004303, "learning_rate": 1.492198565087375e-06, "loss": 0.4699, "step": 10911 }, { "epoch": 2.76183244748165, "grad_norm": 0.18371304869651794, "learning_rate": 1.4909405394886867e-06, "loss": 0.459, "step": 10912 }, { "epoch": 2.762085547962541, "grad_norm": 0.18508762121200562, "learning_rate": 1.4896830016963271e-06, "loss": 0.4331, "step": 10913 }, { "epoch": 2.762338648443432, "grad_norm": 0.18821537494659424, "learning_rate": 1.4884259517823906e-06, "loss": 0.4595, "step": 10914 }, { "epoch": 2.762591748924323, "grad_norm": 0.18654337525367737, "learning_rate": 1.4871693898189377e-06, "loss": 0.4408, "step": 10915 }, { "epoch": 2.762844849405214, "grad_norm": 0.18985654413700104, "learning_rate": 1.4859133158780036e-06, "loss": 0.4403, "step": 10916 }, { "epoch": 2.763097949886105, "grad_norm": 0.18780626356601715, "learning_rate": 1.4846577300316e-06, "loss": 0.4557, "step": 10917 }, { "epoch": 2.7633510503669956, "grad_norm": 0.19001948833465576, "learning_rate": 1.483402632351707e-06, "loss": 0.457, "step": 10918 }, { "epoch": 2.7636041508478866, "grad_norm": 0.18288443982601166, "learning_rate": 1.482148022910277e-06, "loss": 0.4386, "step": 10919 }, { "epoch": 2.7638572513287776, "grad_norm": 0.18867386877536774, "learning_rate": 1.4808939017792302e-06, "loss": 0.4683, "step": 10920 }, { "epoch": 2.7641103518096686, "grad_norm": 0.1906137466430664, "learning_rate": 1.4796402690304644e-06, "loss": 0.4426, "step": 10921 }, { "epoch": 2.764363452290559, "grad_norm": 0.19389931857585907, "learning_rate": 1.478387124735846e-06, "loss": 0.4368, "step": 10922 }, { "epoch": 2.76461655277145, "grad_norm": 0.19205188751220703, "learning_rate": 1.4771344689672196e-06, "loss": 0.4747, "step": 10923 }, { "epoch": 2.764869653252341, "grad_norm": 0.18399591743946075, "learning_rate": 1.4758823017963965e-06, "loss": 0.4505, "step": 10924 }, { "epoch": 2.765122753733232, "grad_norm": 0.18558895587921143, "learning_rate": 1.4746306232951601e-06, "loss": 0.4681, "step": 10925 }, { "epoch": 2.765375854214123, "grad_norm": 0.18887269496917725, "learning_rate": 1.4733794335352636e-06, "loss": 0.4533, "step": 10926 }, { "epoch": 2.765628954695014, "grad_norm": 0.1919623166322708, "learning_rate": 1.472128732588437e-06, "loss": 0.4782, "step": 10927 }, { "epoch": 2.7658820551759047, "grad_norm": 0.18892572820186615, "learning_rate": 1.470878520526381e-06, "loss": 0.4539, "step": 10928 }, { "epoch": 2.7661351556567957, "grad_norm": 0.18610092997550964, "learning_rate": 1.4696287974207645e-06, "loss": 0.4313, "step": 10929 }, { "epoch": 2.7663882561376867, "grad_norm": 0.18788711726665497, "learning_rate": 1.4683795633432386e-06, "loss": 0.4643, "step": 10930 }, { "epoch": 2.7666413566185777, "grad_norm": 0.19075854122638702, "learning_rate": 1.467130818365412e-06, "loss": 0.4505, "step": 10931 }, { "epoch": 2.7668944570994682, "grad_norm": 0.18860667943954468, "learning_rate": 1.4658825625588758e-06, "loss": 0.4426, "step": 10932 }, { "epoch": 2.7671475575803592, "grad_norm": 0.19034750759601593, "learning_rate": 1.464634795995189e-06, "loss": 0.4785, "step": 10933 }, { "epoch": 2.7674006580612502, "grad_norm": 0.19231627881526947, "learning_rate": 1.4633875187458846e-06, "loss": 0.4593, "step": 10934 }, { "epoch": 2.7676537585421412, "grad_norm": 0.19971466064453125, "learning_rate": 1.462140730882462e-06, "loss": 0.4649, "step": 10935 }, { "epoch": 2.7679068590230322, "grad_norm": 0.6298444271087646, "learning_rate": 1.4608944324764063e-06, "loss": 0.456, "step": 10936 }, { "epoch": 2.7681599595039232, "grad_norm": 0.18789252638816833, "learning_rate": 1.4596486235991558e-06, "loss": 0.4555, "step": 10937 }, { "epoch": 2.7684130599848142, "grad_norm": 0.19229656457901, "learning_rate": 1.4584033043221347e-06, "loss": 0.4721, "step": 10938 }, { "epoch": 2.768666160465705, "grad_norm": 0.19054825603961945, "learning_rate": 1.4571584747167323e-06, "loss": 0.43, "step": 10939 }, { "epoch": 2.768919260946596, "grad_norm": 0.18560415506362915, "learning_rate": 1.4559141348543138e-06, "loss": 0.4566, "step": 10940 }, { "epoch": 2.769172361427487, "grad_norm": 0.1866072714328766, "learning_rate": 1.4546702848062134e-06, "loss": 0.4508, "step": 10941 }, { "epoch": 2.7694254619083774, "grad_norm": 0.19314908981323242, "learning_rate": 1.4534269246437393e-06, "loss": 0.457, "step": 10942 }, { "epoch": 2.7696785623892684, "grad_norm": 0.18126416206359863, "learning_rate": 1.4521840544381704e-06, "loss": 0.4126, "step": 10943 }, { "epoch": 2.7699316628701594, "grad_norm": 0.18891564011573792, "learning_rate": 1.4509416742607563e-06, "loss": 0.4643, "step": 10944 }, { "epoch": 2.7701847633510503, "grad_norm": 0.18496590852737427, "learning_rate": 1.4496997841827231e-06, "loss": 0.4272, "step": 10945 }, { "epoch": 2.7704378638319413, "grad_norm": 0.18690958619117737, "learning_rate": 1.448458384275263e-06, "loss": 0.4615, "step": 10946 }, { "epoch": 2.7706909643128323, "grad_norm": 0.17750471830368042, "learning_rate": 1.4472174746095435e-06, "loss": 0.4518, "step": 10947 }, { "epoch": 2.7709440647937233, "grad_norm": 0.1863979548215866, "learning_rate": 1.4459770552567042e-06, "loss": 0.4497, "step": 10948 }, { "epoch": 2.771197165274614, "grad_norm": 0.18597222864627838, "learning_rate": 1.4447371262878562e-06, "loss": 0.4629, "step": 10949 }, { "epoch": 2.771450265755505, "grad_norm": 0.1844824254512787, "learning_rate": 1.4434976877740793e-06, "loss": 0.4514, "step": 10950 }, { "epoch": 2.771703366236396, "grad_norm": 0.1853698492050171, "learning_rate": 1.4422587397864308e-06, "loss": 0.4395, "step": 10951 }, { "epoch": 2.771956466717287, "grad_norm": 0.18914639949798584, "learning_rate": 1.4410202823959351e-06, "loss": 0.4633, "step": 10952 }, { "epoch": 2.7722095671981775, "grad_norm": 0.18213531374931335, "learning_rate": 1.4397823156735901e-06, "loss": 0.4515, "step": 10953 }, { "epoch": 2.7724626676790685, "grad_norm": 0.1883242428302765, "learning_rate": 1.4385448396903678e-06, "loss": 0.4547, "step": 10954 }, { "epoch": 2.7727157681599595, "grad_norm": 0.18458864092826843, "learning_rate": 1.4373078545172092e-06, "loss": 0.4489, "step": 10955 }, { "epoch": 2.7729688686408505, "grad_norm": 0.18344564735889435, "learning_rate": 1.4360713602250266e-06, "loss": 0.4581, "step": 10956 }, { "epoch": 2.7732219691217415, "grad_norm": 0.193887397646904, "learning_rate": 1.4348353568847072e-06, "loss": 0.4404, "step": 10957 }, { "epoch": 2.7734750696026325, "grad_norm": 0.19295766949653625, "learning_rate": 1.4335998445671106e-06, "loss": 0.4573, "step": 10958 }, { "epoch": 2.773728170083523, "grad_norm": 0.1846502274274826, "learning_rate": 1.4323648233430609e-06, "loss": 0.4301, "step": 10959 }, { "epoch": 2.773981270564414, "grad_norm": 0.1990244835615158, "learning_rate": 1.4311302932833592e-06, "loss": 0.4365, "step": 10960 }, { "epoch": 2.774234371045305, "grad_norm": 0.19063635170459747, "learning_rate": 1.4298962544587846e-06, "loss": 0.4304, "step": 10961 }, { "epoch": 2.774487471526196, "grad_norm": 0.18524616956710815, "learning_rate": 1.4286627069400772e-06, "loss": 0.4737, "step": 10962 }, { "epoch": 2.7747405720070866, "grad_norm": 0.18060976266860962, "learning_rate": 1.4274296507979557e-06, "loss": 0.4347, "step": 10963 }, { "epoch": 2.7749936724879776, "grad_norm": 0.1903776377439499, "learning_rate": 1.4261970861031093e-06, "loss": 0.4604, "step": 10964 }, { "epoch": 2.7752467729688686, "grad_norm": 0.18797464668750763, "learning_rate": 1.424965012926195e-06, "loss": 0.4643, "step": 10965 }, { "epoch": 2.7754998734497596, "grad_norm": 0.190491184592247, "learning_rate": 1.4237334313378449e-06, "loss": 0.4458, "step": 10966 }, { "epoch": 2.7757529739306506, "grad_norm": 0.1865459680557251, "learning_rate": 1.422502341408667e-06, "loss": 0.4548, "step": 10967 }, { "epoch": 2.7760060744115416, "grad_norm": 0.20209170877933502, "learning_rate": 1.4212717432092338e-06, "loss": 0.4503, "step": 10968 }, { "epoch": 2.7762591748924326, "grad_norm": 0.19127048552036285, "learning_rate": 1.4200416368100979e-06, "loss": 0.4729, "step": 10969 }, { "epoch": 2.776512275373323, "grad_norm": 0.19148972630500793, "learning_rate": 1.4188120222817713e-06, "loss": 0.4559, "step": 10970 }, { "epoch": 2.776765375854214, "grad_norm": 0.1815030872821808, "learning_rate": 1.4175828996947494e-06, "loss": 0.4439, "step": 10971 }, { "epoch": 2.777018476335105, "grad_norm": 0.1920109987258911, "learning_rate": 1.4163542691194942e-06, "loss": 0.4441, "step": 10972 }, { "epoch": 2.7772715768159957, "grad_norm": 0.1884557157754898, "learning_rate": 1.4151261306264386e-06, "loss": 0.432, "step": 10973 }, { "epoch": 2.7775246772968867, "grad_norm": 0.1915397346019745, "learning_rate": 1.4138984842859938e-06, "loss": 0.4555, "step": 10974 }, { "epoch": 2.7777777777777777, "grad_norm": 0.18526573479175568, "learning_rate": 1.412671330168537e-06, "loss": 0.4614, "step": 10975 }, { "epoch": 2.7780308782586687, "grad_norm": 0.18279209733009338, "learning_rate": 1.411444668344415e-06, "loss": 0.4601, "step": 10976 }, { "epoch": 2.7782839787395597, "grad_norm": 0.18438994884490967, "learning_rate": 1.4102184988839528e-06, "loss": 0.4441, "step": 10977 }, { "epoch": 2.7785370792204507, "grad_norm": 0.18696768581867218, "learning_rate": 1.4089928218574422e-06, "loss": 0.4549, "step": 10978 }, { "epoch": 2.7787901797013417, "grad_norm": 0.18489545583724976, "learning_rate": 1.4077676373351467e-06, "loss": 0.4466, "step": 10979 }, { "epoch": 2.7790432801822322, "grad_norm": 0.1922905147075653, "learning_rate": 1.4065429453873124e-06, "loss": 0.4382, "step": 10980 }, { "epoch": 2.7792963806631232, "grad_norm": 0.1886759251356125, "learning_rate": 1.405318746084139e-06, "loss": 0.4321, "step": 10981 }, { "epoch": 2.7795494811440142, "grad_norm": 0.18610544502735138, "learning_rate": 1.4040950394958109e-06, "loss": 0.4449, "step": 10982 }, { "epoch": 2.7798025816249052, "grad_norm": 0.1869025081396103, "learning_rate": 1.40287182569248e-06, "loss": 0.4718, "step": 10983 }, { "epoch": 2.780055682105796, "grad_norm": 0.18670471012592316, "learning_rate": 1.4016491047442704e-06, "loss": 0.4618, "step": 10984 }, { "epoch": 2.780308782586687, "grad_norm": 0.18935652077198029, "learning_rate": 1.400426876721278e-06, "loss": 0.4501, "step": 10985 }, { "epoch": 2.780561883067578, "grad_norm": 0.18288712203502655, "learning_rate": 1.3992051416935715e-06, "loss": 0.4475, "step": 10986 }, { "epoch": 2.780814983548469, "grad_norm": 0.18381522595882416, "learning_rate": 1.3979838997311901e-06, "loss": 0.4337, "step": 10987 }, { "epoch": 2.78106808402936, "grad_norm": 0.2252303510904312, "learning_rate": 1.3967631509041446e-06, "loss": 0.4476, "step": 10988 }, { "epoch": 2.781321184510251, "grad_norm": 0.18752510845661163, "learning_rate": 1.395542895282419e-06, "loss": 0.4372, "step": 10989 }, { "epoch": 2.7815742849911413, "grad_norm": 0.18342073261737823, "learning_rate": 1.3943231329359663e-06, "loss": 0.4668, "step": 10990 }, { "epoch": 2.7818273854720323, "grad_norm": 0.1905638426542282, "learning_rate": 1.3931038639347127e-06, "loss": 0.4566, "step": 10991 }, { "epoch": 2.7820804859529233, "grad_norm": 0.1884710043668747, "learning_rate": 1.391885088348558e-06, "loss": 0.4566, "step": 10992 }, { "epoch": 2.7823335864338143, "grad_norm": 0.185530886054039, "learning_rate": 1.3906668062473717e-06, "loss": 0.4587, "step": 10993 }, { "epoch": 2.782586686914705, "grad_norm": 0.19753186404705048, "learning_rate": 1.3894490177009945e-06, "loss": 0.4562, "step": 10994 }, { "epoch": 2.782839787395596, "grad_norm": 0.1823217123746872, "learning_rate": 1.3882317227792409e-06, "loss": 0.4659, "step": 10995 }, { "epoch": 2.783092887876487, "grad_norm": 0.1898585557937622, "learning_rate": 1.3870149215518935e-06, "loss": 0.4729, "step": 10996 }, { "epoch": 2.783345988357378, "grad_norm": 0.19005152583122253, "learning_rate": 1.3857986140887114e-06, "loss": 0.4829, "step": 10997 }, { "epoch": 2.783599088838269, "grad_norm": 0.1870080977678299, "learning_rate": 1.384582800459422e-06, "loss": 0.4655, "step": 10998 }, { "epoch": 2.78385218931916, "grad_norm": 0.19082500040531158, "learning_rate": 1.3833674807337249e-06, "loss": 0.4528, "step": 10999 }, { "epoch": 2.784105289800051, "grad_norm": 0.18927952647209167, "learning_rate": 1.3821526549812924e-06, "loss": 0.4912, "step": 11000 }, { "epoch": 2.7843583902809415, "grad_norm": 0.18444593250751495, "learning_rate": 1.3809383232717665e-06, "loss": 0.4638, "step": 11001 }, { "epoch": 2.7846114907618325, "grad_norm": 0.18928617238998413, "learning_rate": 1.379724485674767e-06, "loss": 0.4345, "step": 11002 }, { "epoch": 2.7848645912427235, "grad_norm": 0.1844750940799713, "learning_rate": 1.378511142259874e-06, "loss": 0.4672, "step": 11003 }, { "epoch": 2.785117691723614, "grad_norm": 0.18815551698207855, "learning_rate": 1.3772982930966473e-06, "loss": 0.4781, "step": 11004 }, { "epoch": 2.785370792204505, "grad_norm": 0.19176332652568817, "learning_rate": 1.376085938254621e-06, "loss": 0.4535, "step": 11005 }, { "epoch": 2.785623892685396, "grad_norm": 0.18349216878414154, "learning_rate": 1.3748740778032943e-06, "loss": 0.4764, "step": 11006 }, { "epoch": 2.785876993166287, "grad_norm": 0.18249382078647614, "learning_rate": 1.3736627118121403e-06, "loss": 0.4349, "step": 11007 }, { "epoch": 2.786130093647178, "grad_norm": 0.1905175745487213, "learning_rate": 1.372451840350607e-06, "loss": 0.4471, "step": 11008 }, { "epoch": 2.786383194128069, "grad_norm": 0.18768836557865143, "learning_rate": 1.3712414634881055e-06, "loss": 0.4649, "step": 11009 }, { "epoch": 2.78663629460896, "grad_norm": 0.1953352987766266, "learning_rate": 1.3700315812940258e-06, "loss": 0.4668, "step": 11010 }, { "epoch": 2.7868893950898506, "grad_norm": 0.19036202132701874, "learning_rate": 1.3688221938377322e-06, "loss": 0.4562, "step": 11011 }, { "epoch": 2.7871424955707416, "grad_norm": 0.19108903408050537, "learning_rate": 1.3676133011885518e-06, "loss": 0.4529, "step": 11012 }, { "epoch": 2.7873955960516326, "grad_norm": 0.18244487047195435, "learning_rate": 1.3664049034157922e-06, "loss": 0.4338, "step": 11013 }, { "epoch": 2.7876486965325236, "grad_norm": 0.1889238953590393, "learning_rate": 1.3651970005887239e-06, "loss": 0.4512, "step": 11014 }, { "epoch": 2.787901797013414, "grad_norm": 0.19064153730869293, "learning_rate": 1.3639895927765945e-06, "loss": 0.4418, "step": 11015 }, { "epoch": 2.788154897494305, "grad_norm": 0.1882340908050537, "learning_rate": 1.3627826800486232e-06, "loss": 0.4468, "step": 11016 }, { "epoch": 2.788407997975196, "grad_norm": 0.1888623833656311, "learning_rate": 1.361576262473997e-06, "loss": 0.4475, "step": 11017 }, { "epoch": 2.788661098456087, "grad_norm": 0.1855662763118744, "learning_rate": 1.3603703401218816e-06, "loss": 0.4274, "step": 11018 }, { "epoch": 2.788914198936978, "grad_norm": 0.191093310713768, "learning_rate": 1.3591649130614094e-06, "loss": 0.4745, "step": 11019 }, { "epoch": 2.789167299417869, "grad_norm": 0.1928730607032776, "learning_rate": 1.3579599813616817e-06, "loss": 0.4586, "step": 11020 }, { "epoch": 2.7894203998987597, "grad_norm": 0.1883280724287033, "learning_rate": 1.356755545091777e-06, "loss": 0.4435, "step": 11021 }, { "epoch": 2.7896735003796507, "grad_norm": 0.1953514814376831, "learning_rate": 1.3555516043207417e-06, "loss": 0.458, "step": 11022 }, { "epoch": 2.7899266008605417, "grad_norm": 0.1894192397594452, "learning_rate": 1.3543481591175934e-06, "loss": 0.4425, "step": 11023 }, { "epoch": 2.7901797013414327, "grad_norm": 0.18851371109485626, "learning_rate": 1.3531452095513287e-06, "loss": 0.4549, "step": 11024 }, { "epoch": 2.7904328018223232, "grad_norm": 0.18884512782096863, "learning_rate": 1.3519427556909082e-06, "loss": 0.4503, "step": 11025 }, { "epoch": 2.7906859023032142, "grad_norm": 0.18735486268997192, "learning_rate": 1.3507407976052634e-06, "loss": 0.4395, "step": 11026 }, { "epoch": 2.7909390027841052, "grad_norm": 0.18629670143127441, "learning_rate": 1.3495393353633013e-06, "loss": 0.4585, "step": 11027 }, { "epoch": 2.7911921032649962, "grad_norm": 0.18756186962127686, "learning_rate": 1.3483383690339002e-06, "loss": 0.4488, "step": 11028 }, { "epoch": 2.7914452037458872, "grad_norm": 0.1857071816921234, "learning_rate": 1.3471378986859074e-06, "loss": 0.4514, "step": 11029 }, { "epoch": 2.7916983042267782, "grad_norm": 0.18437564373016357, "learning_rate": 1.3459379243881431e-06, "loss": 0.4657, "step": 11030 }, { "epoch": 2.791951404707669, "grad_norm": 0.18528813123703003, "learning_rate": 1.3447384462094015e-06, "loss": 0.4419, "step": 11031 }, { "epoch": 2.79220450518856, "grad_norm": 0.18508575856685638, "learning_rate": 1.343539464218445e-06, "loss": 0.4394, "step": 11032 }, { "epoch": 2.792457605669451, "grad_norm": 0.19116181135177612, "learning_rate": 1.342340978484008e-06, "loss": 0.4621, "step": 11033 }, { "epoch": 2.792710706150342, "grad_norm": 0.18507708609104156, "learning_rate": 1.3411429890747974e-06, "loss": 0.455, "step": 11034 }, { "epoch": 2.7929638066312323, "grad_norm": 0.19038979709148407, "learning_rate": 1.3399454960594926e-06, "loss": 0.4718, "step": 11035 }, { "epoch": 2.7932169071121233, "grad_norm": 0.18680323660373688, "learning_rate": 1.3387484995067413e-06, "loss": 0.4373, "step": 11036 }, { "epoch": 2.7934700075930143, "grad_norm": 0.2012946605682373, "learning_rate": 1.337551999485167e-06, "loss": 0.4434, "step": 11037 }, { "epoch": 2.7937231080739053, "grad_norm": 0.18453605473041534, "learning_rate": 1.3363559960633609e-06, "loss": 0.4503, "step": 11038 }, { "epoch": 2.7939762085547963, "grad_norm": 0.19156718254089355, "learning_rate": 1.335160489309888e-06, "loss": 0.4493, "step": 11039 }, { "epoch": 2.7942293090356873, "grad_norm": 0.1864934116601944, "learning_rate": 1.3339654792932844e-06, "loss": 0.4597, "step": 11040 }, { "epoch": 2.7944824095165783, "grad_norm": 0.18247027695178986, "learning_rate": 1.3327709660820564e-06, "loss": 0.443, "step": 11041 }, { "epoch": 2.794735509997469, "grad_norm": 0.18562853336334229, "learning_rate": 1.331576949744684e-06, "loss": 0.424, "step": 11042 }, { "epoch": 2.79498861047836, "grad_norm": 0.191756010055542, "learning_rate": 1.3303834303496167e-06, "loss": 0.4589, "step": 11043 }, { "epoch": 2.795241710959251, "grad_norm": 0.1919548362493515, "learning_rate": 1.3291904079652774e-06, "loss": 0.4663, "step": 11044 }, { "epoch": 2.795494811440142, "grad_norm": 0.18442945182323456, "learning_rate": 1.3279978826600593e-06, "loss": 0.4491, "step": 11045 }, { "epoch": 2.7957479119210324, "grad_norm": 0.18415629863739014, "learning_rate": 1.3268058545023265e-06, "loss": 0.4481, "step": 11046 }, { "epoch": 2.7960010124019234, "grad_norm": 0.18942591547966003, "learning_rate": 1.325614323560419e-06, "loss": 0.4583, "step": 11047 }, { "epoch": 2.7962541128828144, "grad_norm": 0.1907377690076828, "learning_rate": 1.3244232899026365e-06, "loss": 0.444, "step": 11048 }, { "epoch": 2.7965072133637054, "grad_norm": 0.19459204375743866, "learning_rate": 1.3232327535972667e-06, "loss": 0.4677, "step": 11049 }, { "epoch": 2.7967603138445964, "grad_norm": 0.1906851828098297, "learning_rate": 1.3220427147125582e-06, "loss": 0.4687, "step": 11050 }, { "epoch": 2.7970134143254874, "grad_norm": 0.2212270349264145, "learning_rate": 1.3208531733167319e-06, "loss": 0.4576, "step": 11051 }, { "epoch": 2.797266514806378, "grad_norm": 0.186470165848732, "learning_rate": 1.3196641294779854e-06, "loss": 0.4729, "step": 11052 }, { "epoch": 2.797519615287269, "grad_norm": 0.1927388608455658, "learning_rate": 1.318475583264478e-06, "loss": 0.439, "step": 11053 }, { "epoch": 2.79777271576816, "grad_norm": 0.19385096430778503, "learning_rate": 1.3172875347443482e-06, "loss": 0.4464, "step": 11054 }, { "epoch": 2.798025816249051, "grad_norm": 0.18890434503555298, "learning_rate": 1.316099983985708e-06, "loss": 0.4714, "step": 11055 }, { "epoch": 2.7982789167299416, "grad_norm": 0.1835601031780243, "learning_rate": 1.3149129310566355e-06, "loss": 0.4156, "step": 11056 }, { "epoch": 2.7985320172108326, "grad_norm": 0.18642666935920715, "learning_rate": 1.3137263760251816e-06, "loss": 0.4446, "step": 11057 }, { "epoch": 2.7987851176917236, "grad_norm": 0.1948690116405487, "learning_rate": 1.312540318959371e-06, "loss": 0.4442, "step": 11058 }, { "epoch": 2.7990382181726146, "grad_norm": 0.18756484985351562, "learning_rate": 1.3113547599271925e-06, "loss": 0.4465, "step": 11059 }, { "epoch": 2.7992913186535056, "grad_norm": 0.18710601329803467, "learning_rate": 1.3101696989966162e-06, "loss": 0.4476, "step": 11060 }, { "epoch": 2.7995444191343966, "grad_norm": 0.19010700285434723, "learning_rate": 1.3089851362355744e-06, "loss": 0.4723, "step": 11061 }, { "epoch": 2.799797519615287, "grad_norm": 0.19648098945617676, "learning_rate": 1.3078010717119816e-06, "loss": 0.4574, "step": 11062 }, { "epoch": 2.800050620096178, "grad_norm": 0.18497367203235626, "learning_rate": 1.306617505493718e-06, "loss": 0.4595, "step": 11063 }, { "epoch": 2.800303720577069, "grad_norm": 0.19121134281158447, "learning_rate": 1.3054344376486294e-06, "loss": 0.4389, "step": 11064 }, { "epoch": 2.80055682105796, "grad_norm": 0.1884251832962036, "learning_rate": 1.3042518682445404e-06, "loss": 0.4313, "step": 11065 }, { "epoch": 2.8008099215388507, "grad_norm": 0.1830429881811142, "learning_rate": 1.3030697973492468e-06, "loss": 0.4497, "step": 11066 }, { "epoch": 2.8010630220197417, "grad_norm": 0.18677183985710144, "learning_rate": 1.3018882250305132e-06, "loss": 0.4936, "step": 11067 }, { "epoch": 2.8013161225006327, "grad_norm": 0.20277813076972961, "learning_rate": 1.3007071513560754e-06, "loss": 0.4499, "step": 11068 }, { "epoch": 2.8015692229815237, "grad_norm": 0.18529339134693146, "learning_rate": 1.2995265763936472e-06, "loss": 0.4684, "step": 11069 }, { "epoch": 2.8018223234624147, "grad_norm": 0.19683142006397247, "learning_rate": 1.298346500210902e-06, "loss": 0.4991, "step": 11070 }, { "epoch": 2.8020754239433057, "grad_norm": 0.18621821701526642, "learning_rate": 1.2971669228754936e-06, "loss": 0.4518, "step": 11071 }, { "epoch": 2.8023285244241967, "grad_norm": 0.19738830626010895, "learning_rate": 1.2959878444550444e-06, "loss": 0.4618, "step": 11072 }, { "epoch": 2.802581624905087, "grad_norm": 0.19100312888622284, "learning_rate": 1.2948092650171496e-06, "loss": 0.4763, "step": 11073 }, { "epoch": 2.802834725385978, "grad_norm": 0.2031887024641037, "learning_rate": 1.2936311846293737e-06, "loss": 0.4596, "step": 11074 }, { "epoch": 2.803087825866869, "grad_norm": 0.18520525097846985, "learning_rate": 1.2924536033592538e-06, "loss": 0.4639, "step": 11075 }, { "epoch": 2.80334092634776, "grad_norm": 0.17885006964206696, "learning_rate": 1.2912765212742973e-06, "loss": 0.4593, "step": 11076 }, { "epoch": 2.8035940268286508, "grad_norm": 0.19054262340068817, "learning_rate": 1.2900999384419843e-06, "loss": 0.4503, "step": 11077 }, { "epoch": 2.8038471273095418, "grad_norm": 0.18256668746471405, "learning_rate": 1.2889238549297667e-06, "loss": 0.4221, "step": 11078 }, { "epoch": 2.8041002277904328, "grad_norm": 0.1862882524728775, "learning_rate": 1.287748270805066e-06, "loss": 0.4353, "step": 11079 }, { "epoch": 2.8043533282713238, "grad_norm": 0.1890057474374771, "learning_rate": 1.2865731861352758e-06, "loss": 0.4336, "step": 11080 }, { "epoch": 2.8046064287522148, "grad_norm": 0.18982133269309998, "learning_rate": 1.2853986009877628e-06, "loss": 0.4575, "step": 11081 }, { "epoch": 2.8048595292331058, "grad_norm": 0.1865515112876892, "learning_rate": 1.2842245154298606e-06, "loss": 0.4461, "step": 11082 }, { "epoch": 2.8051126297139963, "grad_norm": 0.18420717120170593, "learning_rate": 1.2830509295288796e-06, "loss": 0.4589, "step": 11083 }, { "epoch": 2.8053657301948873, "grad_norm": 0.19060923159122467, "learning_rate": 1.2818778433520984e-06, "loss": 0.4319, "step": 11084 }, { "epoch": 2.8056188306757783, "grad_norm": 0.18909884989261627, "learning_rate": 1.2807052569667666e-06, "loss": 0.4447, "step": 11085 }, { "epoch": 2.8058719311566693, "grad_norm": 0.18650196492671967, "learning_rate": 1.2795331704401059e-06, "loss": 0.4626, "step": 11086 }, { "epoch": 2.80612503163756, "grad_norm": 0.19374486804008484, "learning_rate": 1.2783615838393116e-06, "loss": 0.4712, "step": 11087 }, { "epoch": 2.806378132118451, "grad_norm": 0.1965380162000656, "learning_rate": 1.2771904972315464e-06, "loss": 0.4484, "step": 11088 }, { "epoch": 2.806631232599342, "grad_norm": 0.18950144946575165, "learning_rate": 1.2760199106839467e-06, "loss": 0.4374, "step": 11089 }, { "epoch": 2.806884333080233, "grad_norm": 0.1815137416124344, "learning_rate": 1.27484982426362e-06, "loss": 0.442, "step": 11090 }, { "epoch": 2.807137433561124, "grad_norm": 0.19002990424633026, "learning_rate": 1.2736802380376467e-06, "loss": 0.4479, "step": 11091 }, { "epoch": 2.807390534042015, "grad_norm": 0.1871078461408615, "learning_rate": 1.2725111520730705e-06, "loss": 0.4562, "step": 11092 }, { "epoch": 2.8076436345229054, "grad_norm": 0.18267890810966492, "learning_rate": 1.2713425664369195e-06, "loss": 0.4436, "step": 11093 }, { "epoch": 2.8078967350037964, "grad_norm": 0.20352882146835327, "learning_rate": 1.2701744811961826e-06, "loss": 0.4444, "step": 11094 }, { "epoch": 2.8081498354846874, "grad_norm": 0.18186329305171967, "learning_rate": 1.269006896417825e-06, "loss": 0.4554, "step": 11095 }, { "epoch": 2.8084029359655784, "grad_norm": 0.1984749734401703, "learning_rate": 1.2678398121687819e-06, "loss": 0.4584, "step": 11096 }, { "epoch": 2.808656036446469, "grad_norm": 0.18923822045326233, "learning_rate": 1.2666732285159612e-06, "loss": 0.471, "step": 11097 }, { "epoch": 2.80890913692736, "grad_norm": 0.19345006346702576, "learning_rate": 1.265507145526237e-06, "loss": 0.4551, "step": 11098 }, { "epoch": 2.809162237408251, "grad_norm": 0.18992632627487183, "learning_rate": 1.2643415632664569e-06, "loss": 0.4449, "step": 11099 }, { "epoch": 2.809415337889142, "grad_norm": 0.19036619365215302, "learning_rate": 1.2631764818034475e-06, "loss": 0.4547, "step": 11100 }, { "epoch": 2.809668438370033, "grad_norm": 0.1906251311302185, "learning_rate": 1.2620119012039977e-06, "loss": 0.4792, "step": 11101 }, { "epoch": 2.809921538850924, "grad_norm": 0.1877388060092926, "learning_rate": 1.2608478215348719e-06, "loss": 0.4497, "step": 11102 }, { "epoch": 2.810174639331815, "grad_norm": 0.19256196916103363, "learning_rate": 1.2596842428628009e-06, "loss": 0.4596, "step": 11103 }, { "epoch": 2.8104277398127055, "grad_norm": 0.1879103034734726, "learning_rate": 1.2585211652544916e-06, "loss": 0.4329, "step": 11104 }, { "epoch": 2.8106808402935965, "grad_norm": 0.1838119477033615, "learning_rate": 1.2573585887766182e-06, "loss": 0.4338, "step": 11105 }, { "epoch": 2.8109339407744875, "grad_norm": 0.1856662780046463, "learning_rate": 1.2561965134958354e-06, "loss": 0.4414, "step": 11106 }, { "epoch": 2.8111870412553785, "grad_norm": 0.19477665424346924, "learning_rate": 1.2550349394787575e-06, "loss": 0.469, "step": 11107 }, { "epoch": 2.811440141736269, "grad_norm": 0.18354760110378265, "learning_rate": 1.2538738667919792e-06, "loss": 0.4593, "step": 11108 }, { "epoch": 2.81169324221716, "grad_norm": 0.19335149228572845, "learning_rate": 1.252713295502056e-06, "loss": 0.4603, "step": 11109 }, { "epoch": 2.811946342698051, "grad_norm": 0.18631719052791595, "learning_rate": 1.2515532256755258e-06, "loss": 0.4591, "step": 11110 }, { "epoch": 2.812199443178942, "grad_norm": 0.18292118608951569, "learning_rate": 1.250393657378891e-06, "loss": 0.4453, "step": 11111 }, { "epoch": 2.812452543659833, "grad_norm": 0.18701626360416412, "learning_rate": 1.2492345906786262e-06, "loss": 0.4679, "step": 11112 }, { "epoch": 2.812705644140724, "grad_norm": 0.19324558973312378, "learning_rate": 1.248076025641184e-06, "loss": 0.4679, "step": 11113 }, { "epoch": 2.8129587446216147, "grad_norm": 0.1860673725605011, "learning_rate": 1.2469179623329752e-06, "loss": 0.4549, "step": 11114 }, { "epoch": 2.8132118451025057, "grad_norm": 0.18659867346286774, "learning_rate": 1.2457604008203928e-06, "loss": 0.4612, "step": 11115 }, { "epoch": 2.8134649455833967, "grad_norm": 0.19121600687503815, "learning_rate": 1.2446033411697978e-06, "loss": 0.4648, "step": 11116 }, { "epoch": 2.8137180460642877, "grad_norm": 0.19029107689857483, "learning_rate": 1.2434467834475195e-06, "loss": 0.4407, "step": 11117 }, { "epoch": 2.813971146545178, "grad_norm": 0.18400447070598602, "learning_rate": 1.2422907277198604e-06, "loss": 0.455, "step": 11118 }, { "epoch": 2.814224247026069, "grad_norm": 0.1901925951242447, "learning_rate": 1.241135174053102e-06, "loss": 0.4553, "step": 11119 }, { "epoch": 2.81447734750696, "grad_norm": 0.18710263073444366, "learning_rate": 1.2399801225134812e-06, "loss": 0.4577, "step": 11120 }, { "epoch": 2.814730447987851, "grad_norm": 0.18605773150920868, "learning_rate": 1.2388255731672172e-06, "loss": 0.4462, "step": 11121 }, { "epoch": 2.814983548468742, "grad_norm": 0.19053353369235992, "learning_rate": 1.2376715260804995e-06, "loss": 0.4578, "step": 11122 }, { "epoch": 2.815236648949633, "grad_norm": 0.18671493232250214, "learning_rate": 1.236517981319486e-06, "loss": 0.4338, "step": 11123 }, { "epoch": 2.8154897494305238, "grad_norm": 0.18934518098831177, "learning_rate": 1.2353649389503076e-06, "loss": 0.4571, "step": 11124 }, { "epoch": 2.8157428499114148, "grad_norm": 0.1819983422756195, "learning_rate": 1.2342123990390653e-06, "loss": 0.4332, "step": 11125 }, { "epoch": 2.8159959503923058, "grad_norm": 0.18853946030139923, "learning_rate": 1.233060361651831e-06, "loss": 0.4465, "step": 11126 }, { "epoch": 2.8162490508731968, "grad_norm": 0.19163638353347778, "learning_rate": 1.2319088268546497e-06, "loss": 0.4751, "step": 11127 }, { "epoch": 2.8165021513540873, "grad_norm": 0.1842377632856369, "learning_rate": 1.2307577947135362e-06, "loss": 0.4467, "step": 11128 }, { "epoch": 2.8167552518349783, "grad_norm": 0.18252646923065186, "learning_rate": 1.2296072652944768e-06, "loss": 0.4452, "step": 11129 }, { "epoch": 2.8170083523158693, "grad_norm": 0.1924142837524414, "learning_rate": 1.2284572386634287e-06, "loss": 0.4774, "step": 11130 }, { "epoch": 2.8172614527967603, "grad_norm": 0.19199895858764648, "learning_rate": 1.22730771488632e-06, "loss": 0.443, "step": 11131 }, { "epoch": 2.8175145532776513, "grad_norm": 0.1847301721572876, "learning_rate": 1.2261586940290514e-06, "loss": 0.4479, "step": 11132 }, { "epoch": 2.8177676537585423, "grad_norm": 0.19164316356182098, "learning_rate": 1.225010176157494e-06, "loss": 0.4511, "step": 11133 }, { "epoch": 2.8180207542394333, "grad_norm": 0.1874951273202896, "learning_rate": 1.223862161337488e-06, "loss": 0.4839, "step": 11134 }, { "epoch": 2.818273854720324, "grad_norm": 0.18343111872673035, "learning_rate": 1.2227146496348508e-06, "loss": 0.4608, "step": 11135 }, { "epoch": 2.818526955201215, "grad_norm": 0.19471783936023712, "learning_rate": 1.2215676411153599e-06, "loss": 0.4708, "step": 11136 }, { "epoch": 2.818780055682106, "grad_norm": 0.19876518845558167, "learning_rate": 1.220421135844777e-06, "loss": 0.4381, "step": 11137 }, { "epoch": 2.8190331561629964, "grad_norm": 0.18970294296741486, "learning_rate": 1.2192751338888264e-06, "loss": 0.459, "step": 11138 }, { "epoch": 2.8192862566438874, "grad_norm": 0.1833149492740631, "learning_rate": 1.2181296353132066e-06, "loss": 0.4537, "step": 11139 }, { "epoch": 2.8195393571247784, "grad_norm": 0.18695402145385742, "learning_rate": 1.2169846401835871e-06, "loss": 0.4573, "step": 11140 }, { "epoch": 2.8197924576056694, "grad_norm": 0.18625961244106293, "learning_rate": 1.215840148565608e-06, "loss": 0.4657, "step": 11141 }, { "epoch": 2.8200455580865604, "grad_norm": 0.18994712829589844, "learning_rate": 1.2146961605248786e-06, "loss": 0.4726, "step": 11142 }, { "epoch": 2.8202986585674514, "grad_norm": 0.18880848586559296, "learning_rate": 1.21355267612698e-06, "loss": 0.4751, "step": 11143 }, { "epoch": 2.8205517590483424, "grad_norm": 0.19266840815544128, "learning_rate": 1.2124096954374709e-06, "loss": 0.4548, "step": 11144 }, { "epoch": 2.820804859529233, "grad_norm": 0.1908593326807022, "learning_rate": 1.2112672185218731e-06, "loss": 0.4466, "step": 11145 }, { "epoch": 2.821057960010124, "grad_norm": 0.1992340087890625, "learning_rate": 1.2101252454456825e-06, "loss": 0.4481, "step": 11146 }, { "epoch": 2.821311060491015, "grad_norm": 0.19596900045871735, "learning_rate": 1.2089837762743684e-06, "loss": 0.4537, "step": 11147 }, { "epoch": 2.821564160971906, "grad_norm": 0.18739871680736542, "learning_rate": 1.2078428110733641e-06, "loss": 0.446, "step": 11148 }, { "epoch": 2.8218172614527965, "grad_norm": 0.1836729347705841, "learning_rate": 1.2067023499080788e-06, "loss": 0.4412, "step": 11149 }, { "epoch": 2.8220703619336875, "grad_norm": 0.18439975380897522, "learning_rate": 1.2055623928438975e-06, "loss": 0.4616, "step": 11150 }, { "epoch": 2.8223234624145785, "grad_norm": 0.1880210041999817, "learning_rate": 1.2044229399461694e-06, "loss": 0.4579, "step": 11151 }, { "epoch": 2.8225765628954695, "grad_norm": 0.183010071516037, "learning_rate": 1.2032839912802187e-06, "loss": 0.4259, "step": 11152 }, { "epoch": 2.8228296633763605, "grad_norm": 0.18783988058567047, "learning_rate": 1.2021455469113341e-06, "loss": 0.4746, "step": 11153 }, { "epoch": 2.8230827638572515, "grad_norm": 0.1893616020679474, "learning_rate": 1.2010076069047837e-06, "loss": 0.4453, "step": 11154 }, { "epoch": 2.823335864338142, "grad_norm": 0.18946470320224762, "learning_rate": 1.1998701713258022e-06, "loss": 0.4446, "step": 11155 }, { "epoch": 2.823588964819033, "grad_norm": 0.1842866837978363, "learning_rate": 1.1987332402395936e-06, "loss": 0.4441, "step": 11156 }, { "epoch": 2.823842065299924, "grad_norm": 0.19025447964668274, "learning_rate": 1.1975968137113414e-06, "loss": 0.4537, "step": 11157 }, { "epoch": 2.824095165780815, "grad_norm": 0.19152840971946716, "learning_rate": 1.1964608918061937e-06, "loss": 0.4757, "step": 11158 }, { "epoch": 2.8243482662617057, "grad_norm": 0.18768054246902466, "learning_rate": 1.1953254745892674e-06, "loss": 0.4547, "step": 11159 }, { "epoch": 2.8246013667425967, "grad_norm": 0.1888064593076706, "learning_rate": 1.1941905621256544e-06, "loss": 0.4239, "step": 11160 }, { "epoch": 2.8248544672234877, "grad_norm": 0.180370032787323, "learning_rate": 1.1930561544804164e-06, "loss": 0.4502, "step": 11161 }, { "epoch": 2.8251075677043787, "grad_norm": 0.17990578711032867, "learning_rate": 1.1919222517185858e-06, "loss": 0.4224, "step": 11162 }, { "epoch": 2.8253606681852697, "grad_norm": 0.18893302977085114, "learning_rate": 1.1907888539051727e-06, "loss": 0.4524, "step": 11163 }, { "epoch": 2.8256137686661607, "grad_norm": 0.1900385171175003, "learning_rate": 1.189655961105146e-06, "loss": 0.4917, "step": 11164 }, { "epoch": 2.8258668691470517, "grad_norm": 0.1894540786743164, "learning_rate": 1.1885235733834544e-06, "loss": 0.4494, "step": 11165 }, { "epoch": 2.826119969627942, "grad_norm": 0.22548626363277435, "learning_rate": 1.1873916908050154e-06, "loss": 0.4585, "step": 11166 }, { "epoch": 2.826373070108833, "grad_norm": 0.19986647367477417, "learning_rate": 1.186260313434716e-06, "loss": 0.4659, "step": 11167 }, { "epoch": 2.826626170589724, "grad_norm": 0.203337162733078, "learning_rate": 1.1851294413374171e-06, "loss": 0.4482, "step": 11168 }, { "epoch": 2.8268792710706148, "grad_norm": 0.18913143873214722, "learning_rate": 1.1839990745779495e-06, "loss": 0.4572, "step": 11169 }, { "epoch": 2.8271323715515058, "grad_norm": 0.19323879480361938, "learning_rate": 1.1828692132211128e-06, "loss": 0.4435, "step": 11170 }, { "epoch": 2.8273854720323968, "grad_norm": 0.1900039166212082, "learning_rate": 1.181739857331682e-06, "loss": 0.442, "step": 11171 }, { "epoch": 2.8276385725132878, "grad_norm": 0.2011607140302658, "learning_rate": 1.1806110069743993e-06, "loss": 0.4526, "step": 11172 }, { "epoch": 2.8278916729941788, "grad_norm": 0.18119066953659058, "learning_rate": 1.1794826622139788e-06, "loss": 0.4367, "step": 11173 }, { "epoch": 2.8281447734750698, "grad_norm": 0.18683546781539917, "learning_rate": 1.1783548231151077e-06, "loss": 0.4479, "step": 11174 }, { "epoch": 2.8283978739559608, "grad_norm": 0.189952090382576, "learning_rate": 1.1772274897424418e-06, "loss": 0.4392, "step": 11175 }, { "epoch": 2.8286509744368513, "grad_norm": 0.19355319440364838, "learning_rate": 1.1761006621606075e-06, "loss": 0.4718, "step": 11176 }, { "epoch": 2.8289040749177423, "grad_norm": 0.18744613230228424, "learning_rate": 1.1749743404342063e-06, "loss": 0.4804, "step": 11177 }, { "epoch": 2.8291571753986333, "grad_norm": 0.18710950016975403, "learning_rate": 1.1738485246278042e-06, "loss": 0.4327, "step": 11178 }, { "epoch": 2.8294102758795243, "grad_norm": 0.19023248553276062, "learning_rate": 1.1727232148059452e-06, "loss": 0.4509, "step": 11179 }, { "epoch": 2.829663376360415, "grad_norm": 0.18038491904735565, "learning_rate": 1.171598411033139e-06, "loss": 0.4714, "step": 11180 }, { "epoch": 2.829916476841306, "grad_norm": 0.1836439073085785, "learning_rate": 1.1704741133738683e-06, "loss": 0.4505, "step": 11181 }, { "epoch": 2.830169577322197, "grad_norm": 0.1882273107767105, "learning_rate": 1.1693503218925884e-06, "loss": 0.4488, "step": 11182 }, { "epoch": 2.830422677803088, "grad_norm": 0.18621952831745148, "learning_rate": 1.1682270366537217e-06, "loss": 0.4493, "step": 11183 }, { "epoch": 2.830675778283979, "grad_norm": 0.19073542952537537, "learning_rate": 1.1671042577216641e-06, "loss": 0.4382, "step": 11184 }, { "epoch": 2.83092887876487, "grad_norm": 0.20542630553245544, "learning_rate": 1.1659819851607856e-06, "loss": 0.445, "step": 11185 }, { "epoch": 2.8311819792457604, "grad_norm": 0.18734434247016907, "learning_rate": 1.1648602190354185e-06, "loss": 0.4618, "step": 11186 }, { "epoch": 2.8314350797266514, "grad_norm": 0.19429916143417358, "learning_rate": 1.1637389594098714e-06, "loss": 0.4491, "step": 11187 }, { "epoch": 2.8316881802075424, "grad_norm": 0.19082516431808472, "learning_rate": 1.1626182063484281e-06, "loss": 0.4321, "step": 11188 }, { "epoch": 2.8319412806884334, "grad_norm": 0.19642075896263123, "learning_rate": 1.1614979599153363e-06, "loss": 0.4418, "step": 11189 }, { "epoch": 2.832194381169324, "grad_norm": 0.203606516122818, "learning_rate": 1.1603782201748193e-06, "loss": 0.4397, "step": 11190 }, { "epoch": 2.832447481650215, "grad_norm": 0.19054903090000153, "learning_rate": 1.1592589871910687e-06, "loss": 0.4487, "step": 11191 }, { "epoch": 2.832700582131106, "grad_norm": 0.1866031438112259, "learning_rate": 1.1581402610282456e-06, "loss": 0.4276, "step": 11192 }, { "epoch": 2.832953682611997, "grad_norm": 0.18825265765190125, "learning_rate": 1.157022041750483e-06, "loss": 0.4731, "step": 11193 }, { "epoch": 2.833206783092888, "grad_norm": 0.18601933121681213, "learning_rate": 1.1559043294218908e-06, "loss": 0.4675, "step": 11194 }, { "epoch": 2.833459883573779, "grad_norm": 0.19561991095542908, "learning_rate": 1.1547871241065422e-06, "loss": 0.4503, "step": 11195 }, { "epoch": 2.83371298405467, "grad_norm": 0.19106616079807281, "learning_rate": 1.1536704258684883e-06, "loss": 0.4293, "step": 11196 }, { "epoch": 2.8339660845355605, "grad_norm": 0.19121284782886505, "learning_rate": 1.152554234771741e-06, "loss": 0.4238, "step": 11197 }, { "epoch": 2.8342191850164515, "grad_norm": 0.18803685903549194, "learning_rate": 1.1514385508802917e-06, "loss": 0.4569, "step": 11198 }, { "epoch": 2.8344722854973425, "grad_norm": 0.18751080334186554, "learning_rate": 1.150323374258101e-06, "loss": 0.4632, "step": 11199 }, { "epoch": 2.834725385978233, "grad_norm": 0.19711115956306458, "learning_rate": 1.1492087049690959e-06, "loss": 0.478, "step": 11200 }, { "epoch": 2.834978486459124, "grad_norm": 0.18578407168388367, "learning_rate": 1.1480945430771838e-06, "loss": 0.4609, "step": 11201 }, { "epoch": 2.835231586940015, "grad_norm": 0.1854618340730667, "learning_rate": 1.1469808886462363e-06, "loss": 0.4419, "step": 11202 }, { "epoch": 2.835484687420906, "grad_norm": 0.18940775096416473, "learning_rate": 1.1458677417400932e-06, "loss": 0.4276, "step": 11203 }, { "epoch": 2.835737787901797, "grad_norm": 0.1899564117193222, "learning_rate": 1.1447551024225712e-06, "loss": 0.4481, "step": 11204 }, { "epoch": 2.835990888382688, "grad_norm": 0.19010066986083984, "learning_rate": 1.1436429707574537e-06, "loss": 0.4827, "step": 11205 }, { "epoch": 2.836243988863579, "grad_norm": 0.18529894948005676, "learning_rate": 1.1425313468084975e-06, "loss": 0.4351, "step": 11206 }, { "epoch": 2.8364970893444696, "grad_norm": 0.1900789737701416, "learning_rate": 1.1414202306394318e-06, "loss": 0.4704, "step": 11207 }, { "epoch": 2.8367501898253606, "grad_norm": 0.18918085098266602, "learning_rate": 1.1403096223139543e-06, "loss": 0.4764, "step": 11208 }, { "epoch": 2.8370032903062516, "grad_norm": 0.18953010439872742, "learning_rate": 1.139199521895732e-06, "loss": 0.4265, "step": 11209 }, { "epoch": 2.8372563907871426, "grad_norm": 0.1926405429840088, "learning_rate": 1.1380899294484037e-06, "loss": 0.4661, "step": 11210 }, { "epoch": 2.837509491268033, "grad_norm": 0.19582629203796387, "learning_rate": 1.1369808450355823e-06, "loss": 0.4598, "step": 11211 }, { "epoch": 2.837762591748924, "grad_norm": 0.18699826300144196, "learning_rate": 1.135872268720849e-06, "loss": 0.4605, "step": 11212 }, { "epoch": 2.838015692229815, "grad_norm": 0.184355691075325, "learning_rate": 1.134764200567755e-06, "loss": 0.4723, "step": 11213 }, { "epoch": 2.838268792710706, "grad_norm": 0.18941445648670197, "learning_rate": 1.1336566406398242e-06, "loss": 0.4396, "step": 11214 }, { "epoch": 2.838521893191597, "grad_norm": 0.18725211918354034, "learning_rate": 1.13254958900055e-06, "loss": 0.4497, "step": 11215 }, { "epoch": 2.838774993672488, "grad_norm": 0.1871422529220581, "learning_rate": 1.1314430457133985e-06, "loss": 0.4322, "step": 11216 }, { "epoch": 2.8390280941533788, "grad_norm": 0.188576340675354, "learning_rate": 1.1303370108418044e-06, "loss": 0.4403, "step": 11217 }, { "epoch": 2.8392811946342698, "grad_norm": 0.1913108080625534, "learning_rate": 1.1292314844491748e-06, "loss": 0.4403, "step": 11218 }, { "epoch": 2.8395342951151608, "grad_norm": 0.19176776707172394, "learning_rate": 1.1281264665988878e-06, "loss": 0.4738, "step": 11219 }, { "epoch": 2.8397873955960518, "grad_norm": 0.18426083028316498, "learning_rate": 1.1270219573542907e-06, "loss": 0.4448, "step": 11220 }, { "epoch": 2.8400404960769423, "grad_norm": 0.19981785118579865, "learning_rate": 1.1259179567787026e-06, "loss": 0.4468, "step": 11221 }, { "epoch": 2.8402935965578333, "grad_norm": 0.18944407999515533, "learning_rate": 1.1248144649354142e-06, "loss": 0.4681, "step": 11222 }, { "epoch": 2.8405466970387243, "grad_norm": 0.19085554778575897, "learning_rate": 1.123711481887686e-06, "loss": 0.4675, "step": 11223 }, { "epoch": 2.8407997975196153, "grad_norm": 0.18124854564666748, "learning_rate": 1.1226090076987495e-06, "loss": 0.4334, "step": 11224 }, { "epoch": 2.8410528980005063, "grad_norm": 0.19142770767211914, "learning_rate": 1.1215070424318086e-06, "loss": 0.4529, "step": 11225 }, { "epoch": 2.8413059984813973, "grad_norm": 0.19099362194538116, "learning_rate": 1.1204055861500352e-06, "loss": 0.46, "step": 11226 }, { "epoch": 2.841559098962288, "grad_norm": 0.19573131203651428, "learning_rate": 1.119304638916573e-06, "loss": 0.4301, "step": 11227 }, { "epoch": 2.841812199443179, "grad_norm": 0.19103093445301056, "learning_rate": 1.1182042007945382e-06, "loss": 0.4804, "step": 11228 }, { "epoch": 2.84206529992407, "grad_norm": 0.19650880992412567, "learning_rate": 1.117104271847016e-06, "loss": 0.4449, "step": 11229 }, { "epoch": 2.842318400404961, "grad_norm": 0.1872393935918808, "learning_rate": 1.1160048521370648e-06, "loss": 0.4639, "step": 11230 }, { "epoch": 2.8425715008858514, "grad_norm": 0.19048243761062622, "learning_rate": 1.1149059417277063e-06, "loss": 0.4539, "step": 11231 }, { "epoch": 2.8428246013667424, "grad_norm": 0.18526266515254974, "learning_rate": 1.1138075406819459e-06, "loss": 0.4565, "step": 11232 }, { "epoch": 2.8430777018476334, "grad_norm": 0.19024688005447388, "learning_rate": 1.1127096490627477e-06, "loss": 0.4442, "step": 11233 }, { "epoch": 2.8433308023285244, "grad_norm": 0.19155357778072357, "learning_rate": 1.1116122669330543e-06, "loss": 0.4645, "step": 11234 }, { "epoch": 2.8435839028094154, "grad_norm": 0.19122909009456635, "learning_rate": 1.110515394355778e-06, "loss": 0.4581, "step": 11235 }, { "epoch": 2.8438370032903064, "grad_norm": 0.18906773626804352, "learning_rate": 1.1094190313937937e-06, "loss": 0.4373, "step": 11236 }, { "epoch": 2.8440901037711974, "grad_norm": 0.1865253895521164, "learning_rate": 1.1083231781099578e-06, "loss": 0.4433, "step": 11237 }, { "epoch": 2.844343204252088, "grad_norm": 0.19035270810127258, "learning_rate": 1.1072278345670906e-06, "loss": 0.4634, "step": 11238 }, { "epoch": 2.844596304732979, "grad_norm": 0.24169014394283295, "learning_rate": 1.1061330008279903e-06, "loss": 0.4678, "step": 11239 }, { "epoch": 2.84484940521387, "grad_norm": 0.20311060547828674, "learning_rate": 1.105038676955419e-06, "loss": 0.4548, "step": 11240 }, { "epoch": 2.845102505694761, "grad_norm": 0.19746388494968414, "learning_rate": 1.1039448630121142e-06, "loss": 0.4594, "step": 11241 }, { "epoch": 2.8453556061756515, "grad_norm": 0.18708588182926178, "learning_rate": 1.102851559060777e-06, "loss": 0.4488, "step": 11242 }, { "epoch": 2.8456087066565425, "grad_norm": 0.18735232949256897, "learning_rate": 1.101758765164087e-06, "loss": 0.4741, "step": 11243 }, { "epoch": 2.8458618071374335, "grad_norm": 0.1893690973520279, "learning_rate": 1.1006664813846901e-06, "loss": 0.4647, "step": 11244 }, { "epoch": 2.8461149076183245, "grad_norm": 0.18988998234272003, "learning_rate": 1.099574707785208e-06, "loss": 0.4484, "step": 11245 }, { "epoch": 2.8463680080992155, "grad_norm": 0.1858222335577011, "learning_rate": 1.09848344442823e-06, "loss": 0.4411, "step": 11246 }, { "epoch": 2.8466211085801065, "grad_norm": 0.19007790088653564, "learning_rate": 1.097392691376311e-06, "loss": 0.4917, "step": 11247 }, { "epoch": 2.846874209060997, "grad_norm": 0.28911373019218445, "learning_rate": 1.096302448691986e-06, "loss": 0.4491, "step": 11248 }, { "epoch": 2.847127309541888, "grad_norm": 0.18857456743717194, "learning_rate": 1.0952127164377535e-06, "loss": 0.4385, "step": 11249 }, { "epoch": 2.847380410022779, "grad_norm": 0.1840876340866089, "learning_rate": 1.094123494676087e-06, "loss": 0.4675, "step": 11250 }, { "epoch": 2.84763351050367, "grad_norm": 0.18351218104362488, "learning_rate": 1.0930347834694278e-06, "loss": 0.443, "step": 11251 }, { "epoch": 2.8478866109845606, "grad_norm": 0.1835339516401291, "learning_rate": 1.0919465828801933e-06, "loss": 0.4355, "step": 11252 }, { "epoch": 2.8481397114654516, "grad_norm": 0.18617026507854462, "learning_rate": 1.090858892970764e-06, "loss": 0.4479, "step": 11253 }, { "epoch": 2.8483928119463426, "grad_norm": 0.19069848954677582, "learning_rate": 1.0897717138034958e-06, "loss": 0.4266, "step": 11254 }, { "epoch": 2.8486459124272336, "grad_norm": 0.18831118941307068, "learning_rate": 1.088685045440714e-06, "loss": 0.4774, "step": 11255 }, { "epoch": 2.8488990129081246, "grad_norm": 0.18973173201084137, "learning_rate": 1.0875988879447152e-06, "loss": 0.4882, "step": 11256 }, { "epoch": 2.8491521133890156, "grad_norm": 0.19665026664733887, "learning_rate": 1.086513241377768e-06, "loss": 0.4386, "step": 11257 }, { "epoch": 2.849405213869906, "grad_norm": 0.18376535177230835, "learning_rate": 1.0854281058021076e-06, "loss": 0.4543, "step": 11258 }, { "epoch": 2.849658314350797, "grad_norm": 0.1887301355600357, "learning_rate": 1.0843434812799446e-06, "loss": 0.4739, "step": 11259 }, { "epoch": 2.849911414831688, "grad_norm": 0.1871744990348816, "learning_rate": 1.0832593678734582e-06, "loss": 0.4544, "step": 11260 }, { "epoch": 2.850164515312579, "grad_norm": 0.18673090636730194, "learning_rate": 1.0821757656447973e-06, "loss": 0.4541, "step": 11261 }, { "epoch": 2.8504176157934697, "grad_norm": 0.1948419213294983, "learning_rate": 1.0810926746560823e-06, "loss": 0.4271, "step": 11262 }, { "epoch": 2.8506707162743607, "grad_norm": 0.1884956657886505, "learning_rate": 1.080010094969406e-06, "loss": 0.4333, "step": 11263 }, { "epoch": 2.8509238167552517, "grad_norm": 0.18279722332954407, "learning_rate": 1.0789280266468282e-06, "loss": 0.4452, "step": 11264 }, { "epoch": 2.8511769172361427, "grad_norm": 0.1822231411933899, "learning_rate": 1.0778464697503844e-06, "loss": 0.4379, "step": 11265 }, { "epoch": 2.8514300177170337, "grad_norm": 0.19145691394805908, "learning_rate": 1.0767654243420755e-06, "loss": 0.4928, "step": 11266 }, { "epoch": 2.8516831181979247, "grad_norm": 0.1837167888879776, "learning_rate": 1.0756848904838768e-06, "loss": 0.4434, "step": 11267 }, { "epoch": 2.8519362186788157, "grad_norm": 0.1899966448545456, "learning_rate": 1.0746048682377341e-06, "loss": 0.4502, "step": 11268 }, { "epoch": 2.8521893191597063, "grad_norm": 0.18412497639656067, "learning_rate": 1.073525357665558e-06, "loss": 0.4564, "step": 11269 }, { "epoch": 2.8524424196405973, "grad_norm": 0.19005087018013, "learning_rate": 1.0724463588292399e-06, "loss": 0.4506, "step": 11270 }, { "epoch": 2.8526955201214883, "grad_norm": 0.18152235448360443, "learning_rate": 1.071367871790634e-06, "loss": 0.4581, "step": 11271 }, { "epoch": 2.8529486206023793, "grad_norm": 0.18682026863098145, "learning_rate": 1.0702898966115683e-06, "loss": 0.4764, "step": 11272 }, { "epoch": 2.85320172108327, "grad_norm": 0.1878209114074707, "learning_rate": 1.0692124333538412e-06, "loss": 0.494, "step": 11273 }, { "epoch": 2.853454821564161, "grad_norm": 0.18807855248451233, "learning_rate": 1.0681354820792223e-06, "loss": 0.4467, "step": 11274 }, { "epoch": 2.853707922045052, "grad_norm": 0.18388357758522034, "learning_rate": 1.0670590428494466e-06, "loss": 0.4327, "step": 11275 }, { "epoch": 2.853961022525943, "grad_norm": 0.183041512966156, "learning_rate": 1.065983115726228e-06, "loss": 0.4585, "step": 11276 }, { "epoch": 2.854214123006834, "grad_norm": 0.18665127456188202, "learning_rate": 1.0649077007712472e-06, "loss": 0.4614, "step": 11277 }, { "epoch": 2.854467223487725, "grad_norm": 0.1849743127822876, "learning_rate": 1.0638327980461528e-06, "loss": 0.4473, "step": 11278 }, { "epoch": 2.8547203239686154, "grad_norm": 0.1883174180984497, "learning_rate": 1.0627584076125696e-06, "loss": 0.4553, "step": 11279 }, { "epoch": 2.8549734244495064, "grad_norm": 0.1848708689212799, "learning_rate": 1.061684529532091e-06, "loss": 0.4726, "step": 11280 }, { "epoch": 2.8552265249303974, "grad_norm": 0.18616949021816254, "learning_rate": 1.0606111638662752e-06, "loss": 0.4519, "step": 11281 }, { "epoch": 2.8554796254112884, "grad_norm": 0.1836688369512558, "learning_rate": 1.0595383106766565e-06, "loss": 0.4551, "step": 11282 }, { "epoch": 2.855732725892179, "grad_norm": 0.19071677327156067, "learning_rate": 1.0584659700247445e-06, "loss": 0.4896, "step": 11283 }, { "epoch": 2.85598582637307, "grad_norm": 0.1856207400560379, "learning_rate": 1.0573941419720112e-06, "loss": 0.4437, "step": 11284 }, { "epoch": 2.856238926853961, "grad_norm": 0.1941186785697937, "learning_rate": 1.056322826579903e-06, "loss": 0.4613, "step": 11285 }, { "epoch": 2.856492027334852, "grad_norm": 0.33970198035240173, "learning_rate": 1.0552520239098352e-06, "loss": 0.4573, "step": 11286 }, { "epoch": 2.856745127815743, "grad_norm": 0.19073429703712463, "learning_rate": 1.0541817340231941e-06, "loss": 0.4528, "step": 11287 }, { "epoch": 2.856998228296634, "grad_norm": 0.18557637929916382, "learning_rate": 1.0531119569813352e-06, "loss": 0.47, "step": 11288 }, { "epoch": 2.8572513287775245, "grad_norm": 0.19424781203269958, "learning_rate": 1.052042692845593e-06, "loss": 0.4569, "step": 11289 }, { "epoch": 2.8575044292584155, "grad_norm": 0.1868968904018402, "learning_rate": 1.0509739416772613e-06, "loss": 0.458, "step": 11290 }, { "epoch": 2.8577575297393065, "grad_norm": 0.1861393302679062, "learning_rate": 1.0499057035376126e-06, "loss": 0.4413, "step": 11291 }, { "epoch": 2.8580106302201975, "grad_norm": 0.1839505285024643, "learning_rate": 1.0488379784878822e-06, "loss": 0.4327, "step": 11292 }, { "epoch": 2.858263730701088, "grad_norm": 0.18774943053722382, "learning_rate": 1.0477707665892834e-06, "loss": 0.4652, "step": 11293 }, { "epoch": 2.858516831181979, "grad_norm": 0.18312804400920868, "learning_rate": 1.046704067902996e-06, "loss": 0.4799, "step": 11294 }, { "epoch": 2.85876993166287, "grad_norm": 0.18902724981307983, "learning_rate": 1.0456378824901714e-06, "loss": 0.4197, "step": 11295 }, { "epoch": 2.859023032143761, "grad_norm": 0.18725024163722992, "learning_rate": 1.044572210411936e-06, "loss": 0.4412, "step": 11296 }, { "epoch": 2.859276132624652, "grad_norm": 0.18890802562236786, "learning_rate": 1.0435070517293766e-06, "loss": 0.4571, "step": 11297 }, { "epoch": 2.859529233105543, "grad_norm": 0.19087421894073486, "learning_rate": 1.0424424065035599e-06, "loss": 0.4764, "step": 11298 }, { "epoch": 2.859782333586434, "grad_norm": 0.19555313885211945, "learning_rate": 1.0413782747955182e-06, "loss": 0.486, "step": 11299 }, { "epoch": 2.8600354340673246, "grad_norm": 0.18997813761234283, "learning_rate": 1.0403146566662569e-06, "loss": 0.4647, "step": 11300 }, { "epoch": 2.8602885345482156, "grad_norm": 0.18159820139408112, "learning_rate": 1.0392515521767488e-06, "loss": 0.4678, "step": 11301 }, { "epoch": 2.8605416350291066, "grad_norm": 0.18432019650936127, "learning_rate": 1.038188961387946e-06, "loss": 0.447, "step": 11302 }, { "epoch": 2.8607947355099976, "grad_norm": 0.18582512438297272, "learning_rate": 1.0371268843607563e-06, "loss": 0.4284, "step": 11303 }, { "epoch": 2.861047835990888, "grad_norm": 0.19400502741336823, "learning_rate": 1.0360653211560711e-06, "loss": 0.4373, "step": 11304 }, { "epoch": 2.861300936471779, "grad_norm": 0.19492584466934204, "learning_rate": 1.0350042718347463e-06, "loss": 0.4524, "step": 11305 }, { "epoch": 2.86155403695267, "grad_norm": 0.1855347454547882, "learning_rate": 1.033943736457609e-06, "loss": 0.4408, "step": 11306 }, { "epoch": 2.861807137433561, "grad_norm": 0.18534424901008606, "learning_rate": 1.0328837150854586e-06, "loss": 0.4504, "step": 11307 }, { "epoch": 2.862060237914452, "grad_norm": 0.187869131565094, "learning_rate": 1.031824207779063e-06, "loss": 0.4524, "step": 11308 }, { "epoch": 2.862313338395343, "grad_norm": 0.1945253312587738, "learning_rate": 1.030765214599162e-06, "loss": 0.4708, "step": 11309 }, { "epoch": 2.8625664388762337, "grad_norm": 0.1855369210243225, "learning_rate": 1.029706735606465e-06, "loss": 0.4607, "step": 11310 }, { "epoch": 2.8628195393571247, "grad_norm": 0.18619957566261292, "learning_rate": 1.0286487708616532e-06, "loss": 0.4639, "step": 11311 }, { "epoch": 2.8630726398380157, "grad_norm": 0.1927400529384613, "learning_rate": 1.0275913204253774e-06, "loss": 0.446, "step": 11312 }, { "epoch": 2.8633257403189067, "grad_norm": 0.18336187303066254, "learning_rate": 1.0265343843582575e-06, "loss": 0.4543, "step": 11313 }, { "epoch": 2.8635788407997973, "grad_norm": 0.1880456656217575, "learning_rate": 1.0254779627208867e-06, "loss": 0.4342, "step": 11314 }, { "epoch": 2.8638319412806883, "grad_norm": 1.0294222831726074, "learning_rate": 1.024422055573827e-06, "loss": 0.4846, "step": 11315 }, { "epoch": 2.8640850417615793, "grad_norm": 0.1829422265291214, "learning_rate": 1.0233666629776118e-06, "loss": 0.4388, "step": 11316 }, { "epoch": 2.8643381422424703, "grad_norm": 0.19055399298667908, "learning_rate": 1.0223117849927443e-06, "loss": 0.4593, "step": 11317 }, { "epoch": 2.8645912427233613, "grad_norm": 0.1853838562965393, "learning_rate": 1.0212574216797e-06, "loss": 0.4536, "step": 11318 }, { "epoch": 2.8648443432042523, "grad_norm": 0.22611944377422333, "learning_rate": 1.020203573098918e-06, "loss": 0.4594, "step": 11319 }, { "epoch": 2.865097443685143, "grad_norm": 0.1818734109401703, "learning_rate": 1.0191502393108189e-06, "loss": 0.4465, "step": 11320 }, { "epoch": 2.865350544166034, "grad_norm": 0.18507231771945953, "learning_rate": 1.0180974203757853e-06, "loss": 0.4615, "step": 11321 }, { "epoch": 2.865603644646925, "grad_norm": 0.18645022809505463, "learning_rate": 1.0170451163541751e-06, "loss": 0.4418, "step": 11322 }, { "epoch": 2.865856745127816, "grad_norm": 0.18615403771400452, "learning_rate": 1.0159933273063127e-06, "loss": 0.468, "step": 11323 }, { "epoch": 2.8661098456087064, "grad_norm": 0.1812462955713272, "learning_rate": 1.014942053292497e-06, "loss": 0.4265, "step": 11324 }, { "epoch": 2.8663629460895974, "grad_norm": 0.18932750821113586, "learning_rate": 1.0138912943729917e-06, "loss": 0.4486, "step": 11325 }, { "epoch": 2.8666160465704884, "grad_norm": 0.1851164549589157, "learning_rate": 1.0128410506080355e-06, "loss": 0.4483, "step": 11326 }, { "epoch": 2.8668691470513794, "grad_norm": 0.1875520944595337, "learning_rate": 1.0117913220578401e-06, "loss": 0.4464, "step": 11327 }, { "epoch": 2.8671222475322704, "grad_norm": 0.18472163379192352, "learning_rate": 1.010742108782582e-06, "loss": 0.4626, "step": 11328 }, { "epoch": 2.8673753480131614, "grad_norm": 0.19851745665073395, "learning_rate": 1.0096934108424116e-06, "loss": 0.4716, "step": 11329 }, { "epoch": 2.8676284484940524, "grad_norm": 0.1949947625398636, "learning_rate": 1.0086452282974457e-06, "loss": 0.4293, "step": 11330 }, { "epoch": 2.867881548974943, "grad_norm": 0.19203583896160126, "learning_rate": 1.007597561207776e-06, "loss": 0.4897, "step": 11331 }, { "epoch": 2.868134649455834, "grad_norm": 0.19553644955158234, "learning_rate": 1.006550409633461e-06, "loss": 0.4652, "step": 11332 }, { "epoch": 2.868387749936725, "grad_norm": 0.18654315173625946, "learning_rate": 1.0055037736345363e-06, "loss": 0.4359, "step": 11333 }, { "epoch": 2.8686408504176155, "grad_norm": 0.19663333892822266, "learning_rate": 1.0044576532710005e-06, "loss": 0.464, "step": 11334 }, { "epoch": 2.8688939508985065, "grad_norm": 0.19019608199596405, "learning_rate": 1.0034120486028275e-06, "loss": 0.4678, "step": 11335 }, { "epoch": 2.8691470513793975, "grad_norm": 0.1885492205619812, "learning_rate": 1.0023669596899554e-06, "loss": 0.4673, "step": 11336 }, { "epoch": 2.8694001518602885, "grad_norm": 0.19106952846050262, "learning_rate": 1.0013223865923004e-06, "loss": 0.4478, "step": 11337 }, { "epoch": 2.8696532523411795, "grad_norm": 0.1897558569908142, "learning_rate": 1.0002783293697437e-06, "loss": 0.4579, "step": 11338 }, { "epoch": 2.8699063528220705, "grad_norm": 0.18171373009681702, "learning_rate": 9.992347880821386e-07, "loss": 0.4435, "step": 11339 }, { "epoch": 2.8701594533029615, "grad_norm": 0.18989235162734985, "learning_rate": 9.981917627893113e-07, "loss": 0.4748, "step": 11340 }, { "epoch": 2.870412553783852, "grad_norm": 0.18593961000442505, "learning_rate": 9.971492535510574e-07, "loss": 0.4501, "step": 11341 }, { "epoch": 2.870665654264743, "grad_norm": 0.1872825175523758, "learning_rate": 9.961072604271383e-07, "loss": 0.4356, "step": 11342 }, { "epoch": 2.870918754745634, "grad_norm": 0.18828067183494568, "learning_rate": 9.950657834772892e-07, "loss": 0.4667, "step": 11343 }, { "epoch": 2.871171855226525, "grad_norm": 0.1856699436903, "learning_rate": 9.94024822761218e-07, "loss": 0.456, "step": 11344 }, { "epoch": 2.8714249557074156, "grad_norm": 0.19311068952083588, "learning_rate": 9.929843783385984e-07, "loss": 0.4433, "step": 11345 }, { "epoch": 2.8716780561883066, "grad_norm": 0.18790894746780396, "learning_rate": 9.91944450269081e-07, "loss": 0.4291, "step": 11346 }, { "epoch": 2.8719311566691976, "grad_norm": 0.18614967167377472, "learning_rate": 9.909050386122788e-07, "loss": 0.4547, "step": 11347 }, { "epoch": 2.8721842571500886, "grad_norm": 0.18759426474571228, "learning_rate": 9.898661434277802e-07, "loss": 0.4605, "step": 11348 }, { "epoch": 2.8724373576309796, "grad_norm": 0.1863824427127838, "learning_rate": 9.888277647751432e-07, "loss": 0.4482, "step": 11349 }, { "epoch": 2.8726904581118706, "grad_norm": 0.1885676383972168, "learning_rate": 9.87789902713895e-07, "loss": 0.4372, "step": 11350 }, { "epoch": 2.872943558592761, "grad_norm": 0.1880403459072113, "learning_rate": 9.867525573035342e-07, "loss": 0.4716, "step": 11351 }, { "epoch": 2.873196659073652, "grad_norm": 0.18897229433059692, "learning_rate": 9.857157286035313e-07, "loss": 0.4865, "step": 11352 }, { "epoch": 2.873449759554543, "grad_norm": 0.21190091967582703, "learning_rate": 9.846794166733232e-07, "loss": 0.428, "step": 11353 }, { "epoch": 2.873702860035434, "grad_norm": 0.18291716277599335, "learning_rate": 9.83643621572321e-07, "loss": 0.4611, "step": 11354 }, { "epoch": 2.8739559605163247, "grad_norm": 0.19498541951179504, "learning_rate": 9.826083433599032e-07, "loss": 0.4686, "step": 11355 }, { "epoch": 2.8742090609972157, "grad_norm": 0.19686724245548248, "learning_rate": 9.81573582095422e-07, "loss": 0.4303, "step": 11356 }, { "epoch": 2.8744621614781067, "grad_norm": 0.1861058622598648, "learning_rate": 9.805393378381967e-07, "loss": 0.4603, "step": 11357 }, { "epoch": 2.8747152619589977, "grad_norm": 0.19022703170776367, "learning_rate": 9.795056106475187e-07, "loss": 0.4335, "step": 11358 }, { "epoch": 2.8749683624398887, "grad_norm": 0.18963900208473206, "learning_rate": 9.784724005826497e-07, "loss": 0.4849, "step": 11359 }, { "epoch": 2.8752214629207797, "grad_norm": 0.1857539862394333, "learning_rate": 9.774397077028198e-07, "loss": 0.4558, "step": 11360 }, { "epoch": 2.8754745634016707, "grad_norm": 0.197219118475914, "learning_rate": 9.76407532067234e-07, "loss": 0.4511, "step": 11361 }, { "epoch": 2.8757276638825613, "grad_norm": 0.1851080358028412, "learning_rate": 9.753758737350616e-07, "loss": 0.4431, "step": 11362 }, { "epoch": 2.8759807643634523, "grad_norm": 0.19514136016368866, "learning_rate": 9.743447327654486e-07, "loss": 0.4622, "step": 11363 }, { "epoch": 2.8762338648443433, "grad_norm": 0.1960122138261795, "learning_rate": 9.733141092175059e-07, "loss": 0.4679, "step": 11364 }, { "epoch": 2.876486965325234, "grad_norm": 0.1970899999141693, "learning_rate": 9.722840031503167e-07, "loss": 0.4659, "step": 11365 }, { "epoch": 2.876740065806125, "grad_norm": 0.18392610549926758, "learning_rate": 9.712544146229363e-07, "loss": 0.4481, "step": 11366 }, { "epoch": 2.876993166287016, "grad_norm": 0.18767188489437103, "learning_rate": 9.702253436943887e-07, "loss": 0.4459, "step": 11367 }, { "epoch": 2.877246266767907, "grad_norm": 0.19899873435497284, "learning_rate": 9.691967904236687e-07, "loss": 0.447, "step": 11368 }, { "epoch": 2.877499367248798, "grad_norm": 0.18756243586540222, "learning_rate": 9.681687548697384e-07, "loss": 0.4422, "step": 11369 }, { "epoch": 2.877752467729689, "grad_norm": 0.19105264544487, "learning_rate": 9.671412370915333e-07, "loss": 0.4528, "step": 11370 }, { "epoch": 2.87800556821058, "grad_norm": 0.1873147040605545, "learning_rate": 9.661142371479614e-07, "loss": 0.4355, "step": 11371 }, { "epoch": 2.8782586686914704, "grad_norm": 0.1844426691532135, "learning_rate": 9.650877550978976e-07, "loss": 0.4272, "step": 11372 }, { "epoch": 2.8785117691723614, "grad_norm": 0.1857215315103531, "learning_rate": 9.640617910001883e-07, "loss": 0.4682, "step": 11373 }, { "epoch": 2.8787648696532524, "grad_norm": 0.18649473786354065, "learning_rate": 9.630363449136504e-07, "loss": 0.4745, "step": 11374 }, { "epoch": 2.8790179701341434, "grad_norm": 0.1855965554714203, "learning_rate": 9.620114168970674e-07, "loss": 0.4496, "step": 11375 }, { "epoch": 2.879271070615034, "grad_norm": 0.19244985282421112, "learning_rate": 9.609870070091954e-07, "loss": 0.4789, "step": 11376 }, { "epoch": 2.879524171095925, "grad_norm": 0.1901489794254303, "learning_rate": 9.599631153087675e-07, "loss": 0.4732, "step": 11377 }, { "epoch": 2.879777271576816, "grad_norm": 0.18311719596385956, "learning_rate": 9.589397418544766e-07, "loss": 0.4445, "step": 11378 }, { "epoch": 2.880030372057707, "grad_norm": 0.18841668963432312, "learning_rate": 9.579168867049949e-07, "loss": 0.458, "step": 11379 }, { "epoch": 2.880283472538598, "grad_norm": 0.1865057349205017, "learning_rate": 9.568945499189563e-07, "loss": 0.4744, "step": 11380 }, { "epoch": 2.880536573019489, "grad_norm": 0.1886470764875412, "learning_rate": 9.558727315549697e-07, "loss": 0.4582, "step": 11381 }, { "epoch": 2.8807896735003795, "grad_norm": 0.19130177795886993, "learning_rate": 9.54851431671614e-07, "loss": 0.4525, "step": 11382 }, { "epoch": 2.8810427739812705, "grad_norm": 0.19028723239898682, "learning_rate": 9.538306503274374e-07, "loss": 0.5062, "step": 11383 }, { "epoch": 2.8812958744621615, "grad_norm": 0.18763017654418945, "learning_rate": 9.528103875809636e-07, "loss": 0.4512, "step": 11384 }, { "epoch": 2.8815489749430525, "grad_norm": 0.1856323480606079, "learning_rate": 9.517906434906798e-07, "loss": 0.4169, "step": 11385 }, { "epoch": 2.881802075423943, "grad_norm": 0.1875871866941452, "learning_rate": 9.507714181150429e-07, "loss": 0.4577, "step": 11386 }, { "epoch": 2.882055175904834, "grad_norm": 0.1853805035352707, "learning_rate": 9.49752711512485e-07, "loss": 0.4453, "step": 11387 }, { "epoch": 2.882308276385725, "grad_norm": 0.18014316260814667, "learning_rate": 9.48734523741407e-07, "loss": 0.4765, "step": 11388 }, { "epoch": 2.882561376866616, "grad_norm": 0.18685030937194824, "learning_rate": 9.477168548601778e-07, "loss": 0.4487, "step": 11389 }, { "epoch": 2.882814477347507, "grad_norm": 0.18749333918094635, "learning_rate": 9.466997049271398e-07, "loss": 0.4643, "step": 11390 }, { "epoch": 2.883067577828398, "grad_norm": 0.1846848726272583, "learning_rate": 9.45683074000604e-07, "loss": 0.462, "step": 11391 }, { "epoch": 2.883320678309289, "grad_norm": 0.22226600348949432, "learning_rate": 9.446669621388505e-07, "loss": 0.4668, "step": 11392 }, { "epoch": 2.8835737787901796, "grad_norm": 0.19299037754535675, "learning_rate": 9.436513694001314e-07, "loss": 0.4338, "step": 11393 }, { "epoch": 2.8838268792710706, "grad_norm": 0.18866948783397675, "learning_rate": 9.426362958426694e-07, "loss": 0.4852, "step": 11394 }, { "epoch": 2.8840799797519616, "grad_norm": 0.18655550479888916, "learning_rate": 9.416217415246554e-07, "loss": 0.4418, "step": 11395 }, { "epoch": 2.884333080232852, "grad_norm": 0.18968671560287476, "learning_rate": 9.406077065042507e-07, "loss": 0.4479, "step": 11396 }, { "epoch": 2.884586180713743, "grad_norm": 0.1807897537946701, "learning_rate": 9.3959419083959e-07, "loss": 0.4331, "step": 11397 }, { "epoch": 2.884839281194634, "grad_norm": 0.19611749053001404, "learning_rate": 9.385811945887757e-07, "loss": 0.4451, "step": 11398 }, { "epoch": 2.885092381675525, "grad_norm": 0.18596495687961578, "learning_rate": 9.375687178098792e-07, "loss": 0.4551, "step": 11399 }, { "epoch": 2.885345482156416, "grad_norm": 0.18500235676765442, "learning_rate": 9.365567605609449e-07, "loss": 0.4343, "step": 11400 }, { "epoch": 2.885598582637307, "grad_norm": 0.1908453404903412, "learning_rate": 9.355453228999844e-07, "loss": 0.4326, "step": 11401 }, { "epoch": 2.885851683118198, "grad_norm": 0.1874249130487442, "learning_rate": 9.345344048849847e-07, "loss": 0.45, "step": 11402 }, { "epoch": 2.8861047835990887, "grad_norm": 0.19824542105197906, "learning_rate": 9.335240065738959e-07, "loss": 0.4726, "step": 11403 }, { "epoch": 2.8863578840799797, "grad_norm": 0.19239181280136108, "learning_rate": 9.325141280246442e-07, "loss": 0.4644, "step": 11404 }, { "epoch": 2.8866109845608707, "grad_norm": 0.18672895431518555, "learning_rate": 9.315047692951229e-07, "loss": 0.4491, "step": 11405 }, { "epoch": 2.8868640850417617, "grad_norm": 0.21867966651916504, "learning_rate": 9.304959304431971e-07, "loss": 0.4543, "step": 11406 }, { "epoch": 2.8871171855226523, "grad_norm": 0.1889195740222931, "learning_rate": 9.294876115267015e-07, "loss": 0.4379, "step": 11407 }, { "epoch": 2.8873702860035433, "grad_norm": 0.18809905648231506, "learning_rate": 9.284798126034389e-07, "loss": 0.424, "step": 11408 }, { "epoch": 2.8876233864844343, "grad_norm": 0.18754686415195465, "learning_rate": 9.274725337311874e-07, "loss": 0.4648, "step": 11409 }, { "epoch": 2.8878764869653253, "grad_norm": 0.1838075965642929, "learning_rate": 9.264657749676897e-07, "loss": 0.4484, "step": 11410 }, { "epoch": 2.8881295874462163, "grad_norm": 0.1909538060426712, "learning_rate": 9.25459536370662e-07, "loss": 0.4788, "step": 11411 }, { "epoch": 2.8883826879271073, "grad_norm": 0.1894971877336502, "learning_rate": 9.244538179977891e-07, "loss": 0.4413, "step": 11412 }, { "epoch": 2.888635788407998, "grad_norm": 0.21177974343299866, "learning_rate": 9.234486199067306e-07, "loss": 0.4577, "step": 11413 }, { "epoch": 2.888888888888889, "grad_norm": 0.1892675757408142, "learning_rate": 9.224439421551045e-07, "loss": 0.4625, "step": 11414 }, { "epoch": 2.88914198936978, "grad_norm": 0.18725815415382385, "learning_rate": 9.21439784800513e-07, "loss": 0.4389, "step": 11415 }, { "epoch": 2.889395089850671, "grad_norm": 0.1887408196926117, "learning_rate": 9.20436147900522e-07, "loss": 0.4576, "step": 11416 }, { "epoch": 2.8896481903315614, "grad_norm": 0.18355780839920044, "learning_rate": 9.194330315126654e-07, "loss": 0.4349, "step": 11417 }, { "epoch": 2.8899012908124524, "grad_norm": 0.187166228890419, "learning_rate": 9.18430435694454e-07, "loss": 0.4582, "step": 11418 }, { "epoch": 2.8901543912933434, "grad_norm": 0.1900191754102707, "learning_rate": 9.174283605033585e-07, "loss": 0.4436, "step": 11419 }, { "epoch": 2.8904074917742344, "grad_norm": 0.21760816872119904, "learning_rate": 9.164268059968295e-07, "loss": 0.4818, "step": 11420 }, { "epoch": 2.8906605922551254, "grad_norm": 0.18502919375896454, "learning_rate": 9.154257722322813e-07, "loss": 0.4591, "step": 11421 }, { "epoch": 2.8909136927360164, "grad_norm": 0.19096919894218445, "learning_rate": 9.144252592671043e-07, "loss": 0.4651, "step": 11422 }, { "epoch": 2.891166793216907, "grad_norm": 0.1873212307691574, "learning_rate": 9.134252671586552e-07, "loss": 0.459, "step": 11423 }, { "epoch": 2.891419893697798, "grad_norm": 0.19554178416728973, "learning_rate": 9.124257959642624e-07, "loss": 0.4371, "step": 11424 }, { "epoch": 2.891672994178689, "grad_norm": 0.18110394477844238, "learning_rate": 9.1142684574122e-07, "loss": 0.4431, "step": 11425 }, { "epoch": 2.89192609465958, "grad_norm": 0.18307985365390778, "learning_rate": 9.104284165467969e-07, "loss": 0.451, "step": 11426 }, { "epoch": 2.8921791951404705, "grad_norm": 0.18971692025661469, "learning_rate": 9.094305084382304e-07, "loss": 0.4459, "step": 11427 }, { "epoch": 2.8924322956213615, "grad_norm": 0.18916799128055573, "learning_rate": 9.084331214727304e-07, "loss": 0.4555, "step": 11428 }, { "epoch": 2.8926853961022525, "grad_norm": 0.18556852638721466, "learning_rate": 9.074362557074768e-07, "loss": 0.4579, "step": 11429 }, { "epoch": 2.8929384965831435, "grad_norm": 0.1920868158340454, "learning_rate": 9.064399111996136e-07, "loss": 0.4546, "step": 11430 }, { "epoch": 2.8931915970640345, "grad_norm": 0.18772701919078827, "learning_rate": 9.054440880062598e-07, "loss": 0.4386, "step": 11431 }, { "epoch": 2.8934446975449255, "grad_norm": 0.1941813826560974, "learning_rate": 9.044487861845053e-07, "loss": 0.4355, "step": 11432 }, { "epoch": 2.8936977980258165, "grad_norm": 0.1914607435464859, "learning_rate": 9.034540057914076e-07, "loss": 0.4425, "step": 11433 }, { "epoch": 2.893950898506707, "grad_norm": 0.19147229194641113, "learning_rate": 9.024597468839935e-07, "loss": 0.4341, "step": 11434 }, { "epoch": 2.894203998987598, "grad_norm": 0.18649452924728394, "learning_rate": 9.014660095192685e-07, "loss": 0.468, "step": 11435 }, { "epoch": 2.894457099468489, "grad_norm": 0.18898487091064453, "learning_rate": 9.004727937541946e-07, "loss": 0.4428, "step": 11436 }, { "epoch": 2.89471019994938, "grad_norm": 0.18728406727313995, "learning_rate": 8.994800996457132e-07, "loss": 0.4468, "step": 11437 }, { "epoch": 2.8949633004302706, "grad_norm": 0.18815727531909943, "learning_rate": 8.984879272507341e-07, "loss": 0.4982, "step": 11438 }, { "epoch": 2.8952164009111616, "grad_norm": 0.18528646230697632, "learning_rate": 8.974962766261342e-07, "loss": 0.4455, "step": 11439 }, { "epoch": 2.8954695013920526, "grad_norm": 0.1895037442445755, "learning_rate": 8.965051478287657e-07, "loss": 0.4809, "step": 11440 }, { "epoch": 2.8957226018729436, "grad_norm": 0.20989878475666046, "learning_rate": 8.955145409154453e-07, "loss": 0.4459, "step": 11441 }, { "epoch": 2.8959757023538346, "grad_norm": 0.18722276389598846, "learning_rate": 8.945244559429633e-07, "loss": 0.4365, "step": 11442 }, { "epoch": 2.8962288028347256, "grad_norm": 0.18793514370918274, "learning_rate": 8.935348929680798e-07, "loss": 0.4579, "step": 11443 }, { "epoch": 2.896481903315616, "grad_norm": 0.18384623527526855, "learning_rate": 8.925458520475239e-07, "loss": 0.4428, "step": 11444 }, { "epoch": 2.896735003796507, "grad_norm": 0.18964087963104248, "learning_rate": 8.915573332379945e-07, "loss": 0.4616, "step": 11445 }, { "epoch": 2.896988104277398, "grad_norm": 0.18994006514549255, "learning_rate": 8.905693365961632e-07, "loss": 0.4486, "step": 11446 }, { "epoch": 2.897241204758289, "grad_norm": 0.20266419649124146, "learning_rate": 8.895818621786678e-07, "loss": 0.4484, "step": 11447 }, { "epoch": 2.8974943052391797, "grad_norm": 0.18833115696907043, "learning_rate": 8.885949100421187e-07, "loss": 0.4635, "step": 11448 }, { "epoch": 2.8977474057200707, "grad_norm": 0.17824071645736694, "learning_rate": 8.876084802430962e-07, "loss": 0.4451, "step": 11449 }, { "epoch": 2.8980005062009617, "grad_norm": 0.26733192801475525, "learning_rate": 8.866225728381494e-07, "loss": 0.4391, "step": 11450 }, { "epoch": 2.8982536066818527, "grad_norm": 0.18760061264038086, "learning_rate": 8.85637187883801e-07, "loss": 0.4439, "step": 11451 }, { "epoch": 2.8985067071627437, "grad_norm": 0.19528444111347198, "learning_rate": 8.846523254365347e-07, "loss": 0.4418, "step": 11452 }, { "epoch": 2.8987598076436347, "grad_norm": 0.18190515041351318, "learning_rate": 8.836679855528174e-07, "loss": 0.4659, "step": 11453 }, { "epoch": 2.8990129081245253, "grad_norm": 0.19104227423667908, "learning_rate": 8.826841682890763e-07, "loss": 0.4525, "step": 11454 }, { "epoch": 2.8992660086054163, "grad_norm": 0.19468532502651215, "learning_rate": 8.817008737017119e-07, "loss": 0.4811, "step": 11455 }, { "epoch": 2.8995191090863073, "grad_norm": 0.18951186537742615, "learning_rate": 8.807181018470934e-07, "loss": 0.4441, "step": 11456 }, { "epoch": 2.8997722095671983, "grad_norm": 0.18863226473331451, "learning_rate": 8.797358527815647e-07, "loss": 0.469, "step": 11457 }, { "epoch": 2.900025310048089, "grad_norm": 0.201004296541214, "learning_rate": 8.787541265614286e-07, "loss": 0.459, "step": 11458 }, { "epoch": 2.90027841052898, "grad_norm": 0.18460841476917267, "learning_rate": 8.777729232429732e-07, "loss": 0.4494, "step": 11459 }, { "epoch": 2.900531511009871, "grad_norm": 0.18831636011600494, "learning_rate": 8.767922428824449e-07, "loss": 0.4492, "step": 11460 }, { "epoch": 2.900784611490762, "grad_norm": 0.19176878035068512, "learning_rate": 8.758120855360641e-07, "loss": 0.4382, "step": 11461 }, { "epoch": 2.901037711971653, "grad_norm": 0.18511153757572174, "learning_rate": 8.748324512600225e-07, "loss": 0.4524, "step": 11462 }, { "epoch": 2.901290812452544, "grad_norm": 0.18384723365306854, "learning_rate": 8.738533401104821e-07, "loss": 0.4486, "step": 11463 }, { "epoch": 2.901543912933435, "grad_norm": 0.19962939620018005, "learning_rate": 8.728747521435687e-07, "loss": 0.4656, "step": 11464 }, { "epoch": 2.9017970134143254, "grad_norm": 0.1883750557899475, "learning_rate": 8.718966874153834e-07, "loss": 0.4594, "step": 11465 }, { "epoch": 2.9020501138952164, "grad_norm": 0.18591472506523132, "learning_rate": 8.709191459819987e-07, "loss": 0.4444, "step": 11466 }, { "epoch": 2.9023032143761074, "grad_norm": 0.18368418514728546, "learning_rate": 8.699421278994558e-07, "loss": 0.4901, "step": 11467 }, { "epoch": 2.9025563148569984, "grad_norm": 0.19114165008068085, "learning_rate": 8.689656332237651e-07, "loss": 0.4624, "step": 11468 }, { "epoch": 2.902809415337889, "grad_norm": 0.18737070262432098, "learning_rate": 8.67989662010903e-07, "loss": 0.4945, "step": 11469 }, { "epoch": 2.90306251581878, "grad_norm": 0.18988536298274994, "learning_rate": 8.670142143168225e-07, "loss": 0.4426, "step": 11470 }, { "epoch": 2.903315616299671, "grad_norm": 0.18640422821044922, "learning_rate": 8.660392901974424e-07, "loss": 0.4317, "step": 11471 }, { "epoch": 2.903568716780562, "grad_norm": 0.19245070219039917, "learning_rate": 8.650648897086567e-07, "loss": 0.4818, "step": 11472 }, { "epoch": 2.903821817261453, "grad_norm": 0.19164621829986572, "learning_rate": 8.640910129063229e-07, "loss": 0.4595, "step": 11473 }, { "epoch": 2.904074917742344, "grad_norm": 0.19098590314388275, "learning_rate": 8.631176598462732e-07, "loss": 0.478, "step": 11474 }, { "epoch": 2.9043280182232345, "grad_norm": 0.19429972767829895, "learning_rate": 8.62144830584305e-07, "loss": 0.4522, "step": 11475 }, { "epoch": 2.9045811187041255, "grad_norm": 0.1877809762954712, "learning_rate": 8.611725251761905e-07, "loss": 0.4501, "step": 11476 }, { "epoch": 2.9048342191850165, "grad_norm": 0.19391171634197235, "learning_rate": 8.602007436776693e-07, "loss": 0.4684, "step": 11477 }, { "epoch": 2.9050873196659075, "grad_norm": 0.19259698688983917, "learning_rate": 8.592294861444495e-07, "loss": 0.4719, "step": 11478 }, { "epoch": 2.905340420146798, "grad_norm": 0.19138993322849274, "learning_rate": 8.582587526322172e-07, "loss": 0.4658, "step": 11479 }, { "epoch": 2.905593520627689, "grad_norm": 0.18958836793899536, "learning_rate": 8.572885431966172e-07, "loss": 0.436, "step": 11480 }, { "epoch": 2.90584662110858, "grad_norm": 0.18532656133174896, "learning_rate": 8.563188578932713e-07, "loss": 0.4559, "step": 11481 }, { "epoch": 2.906099721589471, "grad_norm": 0.18547600507736206, "learning_rate": 8.553496967777697e-07, "loss": 0.4298, "step": 11482 }, { "epoch": 2.906352822070362, "grad_norm": 0.1840965896844864, "learning_rate": 8.543810599056712e-07, "loss": 0.4517, "step": 11483 }, { "epoch": 2.906605922551253, "grad_norm": 0.1843889206647873, "learning_rate": 8.534129473325048e-07, "loss": 0.4427, "step": 11484 }, { "epoch": 2.9068590230321436, "grad_norm": 0.19074317812919617, "learning_rate": 8.524453591137749e-07, "loss": 0.4641, "step": 11485 }, { "epoch": 2.9071121235130346, "grad_norm": 0.19387897849082947, "learning_rate": 8.514782953049472e-07, "loss": 0.4591, "step": 11486 }, { "epoch": 2.9073652239939256, "grad_norm": 0.18518313765525818, "learning_rate": 8.505117559614629e-07, "loss": 0.4408, "step": 11487 }, { "epoch": 2.9076183244748166, "grad_norm": 0.19417309761047363, "learning_rate": 8.495457411387309e-07, "loss": 0.4528, "step": 11488 }, { "epoch": 2.907871424955707, "grad_norm": 0.21271325647830963, "learning_rate": 8.485802508921315e-07, "loss": 0.4737, "step": 11489 }, { "epoch": 2.908124525436598, "grad_norm": 0.19921858608722687, "learning_rate": 8.476152852770137e-07, "loss": 0.4671, "step": 11490 }, { "epoch": 2.908377625917489, "grad_norm": 0.19447512924671173, "learning_rate": 8.466508443486976e-07, "loss": 0.4689, "step": 11491 }, { "epoch": 2.90863072639838, "grad_norm": 0.18991701304912567, "learning_rate": 8.456869281624713e-07, "loss": 0.4598, "step": 11492 }, { "epoch": 2.908883826879271, "grad_norm": 0.18759757280349731, "learning_rate": 8.447235367735951e-07, "loss": 0.4536, "step": 11493 }, { "epoch": 2.909136927360162, "grad_norm": 0.18637849390506744, "learning_rate": 8.43760670237298e-07, "loss": 0.4388, "step": 11494 }, { "epoch": 2.909390027841053, "grad_norm": 0.18594779074192047, "learning_rate": 8.427983286087793e-07, "loss": 0.4567, "step": 11495 }, { "epoch": 2.9096431283219437, "grad_norm": 0.1853564828634262, "learning_rate": 8.418365119432082e-07, "loss": 0.4796, "step": 11496 }, { "epoch": 2.9098962288028347, "grad_norm": 0.1905858814716339, "learning_rate": 8.408752202957227e-07, "loss": 0.4632, "step": 11497 }, { "epoch": 2.9101493292837257, "grad_norm": 0.19035740196704865, "learning_rate": 8.399144537214321e-07, "loss": 0.4545, "step": 11498 }, { "epoch": 2.9104024297646167, "grad_norm": 0.18974578380584717, "learning_rate": 8.389542122754157e-07, "loss": 0.4424, "step": 11499 }, { "epoch": 2.9106555302455073, "grad_norm": 0.18837252259254456, "learning_rate": 8.379944960127206e-07, "loss": 0.4458, "step": 11500 }, { "epoch": 2.9109086307263983, "grad_norm": 0.21242563426494598, "learning_rate": 8.370353049883684e-07, "loss": 0.4546, "step": 11501 }, { "epoch": 2.9111617312072893, "grad_norm": 0.1809251606464386, "learning_rate": 8.360766392573416e-07, "loss": 0.4759, "step": 11502 }, { "epoch": 2.9114148316881803, "grad_norm": 0.19475863873958588, "learning_rate": 8.351184988746041e-07, "loss": 0.4452, "step": 11503 }, { "epoch": 2.9116679321690713, "grad_norm": 0.18788599967956543, "learning_rate": 8.341608838950821e-07, "loss": 0.455, "step": 11504 }, { "epoch": 2.9119210326499623, "grad_norm": 0.18511195480823517, "learning_rate": 8.332037943736726e-07, "loss": 0.4428, "step": 11505 }, { "epoch": 2.912174133130853, "grad_norm": 0.18679960072040558, "learning_rate": 8.322472303652463e-07, "loss": 0.4292, "step": 11506 }, { "epoch": 2.912427233611744, "grad_norm": 0.18501415848731995, "learning_rate": 8.312911919246402e-07, "loss": 0.4341, "step": 11507 }, { "epoch": 2.912680334092635, "grad_norm": 0.1923796683549881, "learning_rate": 8.303356791066586e-07, "loss": 0.4598, "step": 11508 }, { "epoch": 2.912933434573526, "grad_norm": 0.18911531567573547, "learning_rate": 8.293806919660808e-07, "loss": 0.4667, "step": 11509 }, { "epoch": 2.9131865350544164, "grad_norm": 0.1883600354194641, "learning_rate": 8.284262305576563e-07, "loss": 0.469, "step": 11510 }, { "epoch": 2.9134396355353074, "grad_norm": 0.18978504836559296, "learning_rate": 8.274722949361003e-07, "loss": 0.4603, "step": 11511 }, { "epoch": 2.9136927360161984, "grad_norm": 0.18698538839817047, "learning_rate": 8.265188851561024e-07, "loss": 0.4724, "step": 11512 }, { "epoch": 2.9139458364970894, "grad_norm": 0.19391992688179016, "learning_rate": 8.255660012723165e-07, "loss": 0.453, "step": 11513 }, { "epoch": 2.9141989369779804, "grad_norm": 0.18893200159072876, "learning_rate": 8.246136433393704e-07, "loss": 0.4727, "step": 11514 }, { "epoch": 2.9144520374588714, "grad_norm": 0.193956196308136, "learning_rate": 8.236618114118589e-07, "loss": 0.4456, "step": 11515 }, { "epoch": 2.914705137939762, "grad_norm": 0.1950560361146927, "learning_rate": 8.227105055443518e-07, "loss": 0.4658, "step": 11516 }, { "epoch": 2.914958238420653, "grad_norm": 0.19444338977336884, "learning_rate": 8.217597257913845e-07, "loss": 0.455, "step": 11517 }, { "epoch": 2.915211338901544, "grad_norm": 0.19137582182884216, "learning_rate": 8.208094722074644e-07, "loss": 0.4638, "step": 11518 }, { "epoch": 2.915464439382435, "grad_norm": 0.19990718364715576, "learning_rate": 8.198597448470647e-07, "loss": 0.4716, "step": 11519 }, { "epoch": 2.9157175398633255, "grad_norm": 0.19529102742671967, "learning_rate": 8.189105437646328e-07, "loss": 0.4683, "step": 11520 }, { "epoch": 2.9159706403442165, "grad_norm": 0.40622150897979736, "learning_rate": 8.179618690145829e-07, "loss": 0.4534, "step": 11521 }, { "epoch": 2.9162237408251075, "grad_norm": 0.18712490797042847, "learning_rate": 8.170137206513007e-07, "loss": 0.4521, "step": 11522 }, { "epoch": 2.9164768413059985, "grad_norm": 0.18841886520385742, "learning_rate": 8.160660987291458e-07, "loss": 0.4363, "step": 11523 }, { "epoch": 2.9167299417868895, "grad_norm": 0.18464155495166779, "learning_rate": 8.151190033024392e-07, "loss": 0.4671, "step": 11524 }, { "epoch": 2.9169830422677805, "grad_norm": 0.18608303368091583, "learning_rate": 8.141724344254754e-07, "loss": 0.4508, "step": 11525 }, { "epoch": 2.9172361427486715, "grad_norm": 0.2302398830652237, "learning_rate": 8.132263921525219e-07, "loss": 0.4413, "step": 11526 }, { "epoch": 2.917489243229562, "grad_norm": 0.1920173168182373, "learning_rate": 8.12280876537811e-07, "loss": 0.4718, "step": 11527 }, { "epoch": 2.917742343710453, "grad_norm": 0.1903320848941803, "learning_rate": 8.11335887635547e-07, "loss": 0.4625, "step": 11528 }, { "epoch": 2.917995444191344, "grad_norm": 0.1846081167459488, "learning_rate": 8.103914254999079e-07, "loss": 0.4485, "step": 11529 }, { "epoch": 2.9182485446722346, "grad_norm": 0.1992633044719696, "learning_rate": 8.094474901850335e-07, "loss": 0.438, "step": 11530 }, { "epoch": 2.9185016451531256, "grad_norm": 0.1873290240764618, "learning_rate": 8.085040817450395e-07, "loss": 0.4412, "step": 11531 }, { "epoch": 2.9187547456340166, "grad_norm": 0.18964138627052307, "learning_rate": 8.075612002340094e-07, "loss": 0.4221, "step": 11532 }, { "epoch": 2.9190078461149076, "grad_norm": 0.1912303864955902, "learning_rate": 8.066188457059954e-07, "loss": 0.4871, "step": 11533 }, { "epoch": 2.9192609465957986, "grad_norm": 0.18671868741512299, "learning_rate": 8.05677018215022e-07, "loss": 0.4478, "step": 11534 }, { "epoch": 2.9195140470766896, "grad_norm": 0.19052696228027344, "learning_rate": 8.047357178150817e-07, "loss": 0.4918, "step": 11535 }, { "epoch": 2.9197671475575806, "grad_norm": 0.18643230199813843, "learning_rate": 8.037949445601368e-07, "loss": 0.4467, "step": 11536 }, { "epoch": 2.920020248038471, "grad_norm": 0.18921436369419098, "learning_rate": 8.028546985041208e-07, "loss": 0.4771, "step": 11537 }, { "epoch": 2.920273348519362, "grad_norm": 0.18821635842323303, "learning_rate": 8.019149797009362e-07, "loss": 0.4474, "step": 11538 }, { "epoch": 2.920526449000253, "grad_norm": 0.19139644503593445, "learning_rate": 8.009757882044533e-07, "loss": 0.457, "step": 11539 }, { "epoch": 2.920779549481144, "grad_norm": 0.19052854180335999, "learning_rate": 8.000371240685156e-07, "loss": 0.4789, "step": 11540 }, { "epoch": 2.9210326499620347, "grad_norm": 0.18788741528987885, "learning_rate": 7.990989873469346e-07, "loss": 0.4713, "step": 11541 }, { "epoch": 2.9212857504429257, "grad_norm": 0.1910671591758728, "learning_rate": 7.981613780934904e-07, "loss": 0.4309, "step": 11542 }, { "epoch": 2.9215388509238167, "grad_norm": 0.22638647258281708, "learning_rate": 7.972242963619359e-07, "loss": 0.4433, "step": 11543 }, { "epoch": 2.9217919514047077, "grad_norm": 0.23083166778087616, "learning_rate": 7.962877422059911e-07, "loss": 0.4399, "step": 11544 }, { "epoch": 2.9220450518855987, "grad_norm": 0.197757750749588, "learning_rate": 7.953517156793467e-07, "loss": 0.4733, "step": 11545 }, { "epoch": 2.9222981523664897, "grad_norm": 0.18694747984409332, "learning_rate": 7.944162168356628e-07, "loss": 0.4764, "step": 11546 }, { "epoch": 2.9225512528473803, "grad_norm": 0.19008180499076843, "learning_rate": 7.934812457285701e-07, "loss": 0.4714, "step": 11547 }, { "epoch": 2.9228043533282713, "grad_norm": 0.19918501377105713, "learning_rate": 7.925468024116678e-07, "loss": 0.459, "step": 11548 }, { "epoch": 2.9230574538091623, "grad_norm": 0.18684175610542297, "learning_rate": 7.916128869385265e-07, "loss": 0.4384, "step": 11549 }, { "epoch": 2.9233105542900533, "grad_norm": 0.1829555630683899, "learning_rate": 7.906794993626854e-07, "loss": 0.4394, "step": 11550 }, { "epoch": 2.923563654770944, "grad_norm": 0.1878608912229538, "learning_rate": 7.897466397376541e-07, "loss": 0.4424, "step": 11551 }, { "epoch": 2.923816755251835, "grad_norm": 0.18801149725914001, "learning_rate": 7.888143081169109e-07, "loss": 0.4522, "step": 11552 }, { "epoch": 2.924069855732726, "grad_norm": 0.18772512674331665, "learning_rate": 7.878825045539007e-07, "loss": 0.4675, "step": 11553 }, { "epoch": 2.924322956213617, "grad_norm": 0.1895466148853302, "learning_rate": 7.869512291020476e-07, "loss": 0.4747, "step": 11554 }, { "epoch": 2.924576056694508, "grad_norm": 0.19443535804748535, "learning_rate": 7.860204818147376e-07, "loss": 0.4567, "step": 11555 }, { "epoch": 2.924829157175399, "grad_norm": 0.18350765109062195, "learning_rate": 7.850902627453283e-07, "loss": 0.4772, "step": 11556 }, { "epoch": 2.92508225765629, "grad_norm": 0.18158818781375885, "learning_rate": 7.84160571947149e-07, "loss": 0.4561, "step": 11557 }, { "epoch": 2.9253353581371804, "grad_norm": 0.19191624224185944, "learning_rate": 7.832314094734938e-07, "loss": 0.4513, "step": 11558 }, { "epoch": 2.9255884586180714, "grad_norm": 0.18897582590579987, "learning_rate": 7.823027753776291e-07, "loss": 0.4492, "step": 11559 }, { "epoch": 2.9258415590989624, "grad_norm": 0.1958928108215332, "learning_rate": 7.813746697127944e-07, "loss": 0.4625, "step": 11560 }, { "epoch": 2.926094659579853, "grad_norm": 0.1892162412405014, "learning_rate": 7.804470925321949e-07, "loss": 0.4728, "step": 11561 }, { "epoch": 2.926347760060744, "grad_norm": 0.1842205971479416, "learning_rate": 7.795200438890083e-07, "loss": 0.4632, "step": 11562 }, { "epoch": 2.926600860541635, "grad_norm": 0.18580564856529236, "learning_rate": 7.785935238363773e-07, "loss": 0.4427, "step": 11563 }, { "epoch": 2.926853961022526, "grad_norm": 0.1888464093208313, "learning_rate": 7.776675324274186e-07, "loss": 0.4516, "step": 11564 }, { "epoch": 2.927107061503417, "grad_norm": 0.18581174314022064, "learning_rate": 7.767420697152184e-07, "loss": 0.4384, "step": 11565 }, { "epoch": 2.927360161984308, "grad_norm": 0.19062526524066925, "learning_rate": 7.758171357528277e-07, "loss": 0.4574, "step": 11566 }, { "epoch": 2.927613262465199, "grad_norm": 0.18487827479839325, "learning_rate": 7.748927305932763e-07, "loss": 0.4565, "step": 11567 }, { "epoch": 2.9278663629460895, "grad_norm": 0.1883862167596817, "learning_rate": 7.739688542895574e-07, "loss": 0.4417, "step": 11568 }, { "epoch": 2.9281194634269805, "grad_norm": 0.18705368041992188, "learning_rate": 7.730455068946308e-07, "loss": 0.4618, "step": 11569 }, { "epoch": 2.9283725639078715, "grad_norm": 0.18416815996170044, "learning_rate": 7.721226884614341e-07, "loss": 0.4408, "step": 11570 }, { "epoch": 2.9286256643887625, "grad_norm": 0.18993251025676727, "learning_rate": 7.712003990428684e-07, "loss": 0.4301, "step": 11571 }, { "epoch": 2.928878764869653, "grad_norm": 0.1929721236228943, "learning_rate": 7.70278638691807e-07, "loss": 0.4419, "step": 11572 }, { "epoch": 2.929131865350544, "grad_norm": 0.19064363837242126, "learning_rate": 7.693574074610933e-07, "loss": 0.452, "step": 11573 }, { "epoch": 2.929384965831435, "grad_norm": 0.18883639574050903, "learning_rate": 7.684367054035391e-07, "loss": 0.4511, "step": 11574 }, { "epoch": 2.929638066312326, "grad_norm": 0.1905648112297058, "learning_rate": 7.675165325719259e-07, "loss": 0.4285, "step": 11575 }, { "epoch": 2.929891166793217, "grad_norm": 0.18774305284023285, "learning_rate": 7.665968890190057e-07, "loss": 0.4671, "step": 11576 }, { "epoch": 2.930144267274108, "grad_norm": 0.187969371676445, "learning_rate": 7.656777747974987e-07, "loss": 0.4511, "step": 11577 }, { "epoch": 2.9303973677549986, "grad_norm": 0.1974993199110031, "learning_rate": 7.647591899600981e-07, "loss": 0.4769, "step": 11578 }, { "epoch": 2.9306504682358896, "grad_norm": 0.19333867728710175, "learning_rate": 7.638411345594621e-07, "loss": 0.4563, "step": 11579 }, { "epoch": 2.9309035687167806, "grad_norm": 0.18524613976478577, "learning_rate": 7.629236086482217e-07, "loss": 0.4645, "step": 11580 }, { "epoch": 2.9311566691976716, "grad_norm": 0.18810632824897766, "learning_rate": 7.620066122789771e-07, "loss": 0.4759, "step": 11581 }, { "epoch": 2.931409769678562, "grad_norm": 0.18563677370548248, "learning_rate": 7.610901455042962e-07, "loss": 0.4485, "step": 11582 }, { "epoch": 2.931662870159453, "grad_norm": 0.18909762799739838, "learning_rate": 7.601742083767205e-07, "loss": 0.4466, "step": 11583 }, { "epoch": 2.931915970640344, "grad_norm": 0.18671482801437378, "learning_rate": 7.592588009487578e-07, "loss": 0.4415, "step": 11584 }, { "epoch": 2.932169071121235, "grad_norm": 0.19036835432052612, "learning_rate": 7.583439232728851e-07, "loss": 0.4444, "step": 11585 }, { "epoch": 2.932422171602126, "grad_norm": 0.1864355206489563, "learning_rate": 7.574295754015526e-07, "loss": 0.433, "step": 11586 }, { "epoch": 2.932675272083017, "grad_norm": 0.18962347507476807, "learning_rate": 7.565157573871762e-07, "loss": 0.4577, "step": 11587 }, { "epoch": 2.932928372563908, "grad_norm": 0.18923375010490417, "learning_rate": 7.556024692821428e-07, "loss": 0.4575, "step": 11588 }, { "epoch": 2.9331814730447987, "grad_norm": 0.19076083600521088, "learning_rate": 7.546897111388119e-07, "loss": 0.4407, "step": 11589 }, { "epoch": 2.9334345735256897, "grad_norm": 0.1921551525592804, "learning_rate": 7.537774830095079e-07, "loss": 0.4662, "step": 11590 }, { "epoch": 2.9336876740065807, "grad_norm": 0.18799364566802979, "learning_rate": 7.52865784946527e-07, "loss": 0.4867, "step": 11591 }, { "epoch": 2.9339407744874713, "grad_norm": 0.1905508190393448, "learning_rate": 7.519546170021352e-07, "loss": 0.4743, "step": 11592 }, { "epoch": 2.9341938749683623, "grad_norm": 0.18667995929718018, "learning_rate": 7.510439792285674e-07, "loss": 0.4629, "step": 11593 }, { "epoch": 2.9344469754492533, "grad_norm": 0.18660208582878113, "learning_rate": 7.501338716780293e-07, "loss": 0.4814, "step": 11594 }, { "epoch": 2.9347000759301443, "grad_norm": 0.18079917132854462, "learning_rate": 7.49224294402695e-07, "loss": 0.4248, "step": 11595 }, { "epoch": 2.9349531764110353, "grad_norm": 0.1893569827079773, "learning_rate": 7.483152474547118e-07, "loss": 0.453, "step": 11596 }, { "epoch": 2.9352062768919263, "grad_norm": 0.17847071588039398, "learning_rate": 7.474067308861854e-07, "loss": 0.425, "step": 11597 }, { "epoch": 2.9354593773728173, "grad_norm": 0.18455547094345093, "learning_rate": 7.464987447492066e-07, "loss": 0.4704, "step": 11598 }, { "epoch": 2.935712477853708, "grad_norm": 0.20727179944515228, "learning_rate": 7.45591289095825e-07, "loss": 0.4619, "step": 11599 }, { "epoch": 2.935965578334599, "grad_norm": 0.19140459597110748, "learning_rate": 7.446843639780644e-07, "loss": 0.4966, "step": 11600 }, { "epoch": 2.93621867881549, "grad_norm": 0.185679093003273, "learning_rate": 7.437779694479185e-07, "loss": 0.4552, "step": 11601 }, { "epoch": 2.936471779296381, "grad_norm": 0.18805812299251556, "learning_rate": 7.428721055573451e-07, "loss": 0.4951, "step": 11602 }, { "epoch": 2.9367248797772714, "grad_norm": 0.1934053748846054, "learning_rate": 7.419667723582768e-07, "loss": 0.4584, "step": 11603 }, { "epoch": 2.9369779802581624, "grad_norm": 0.18384668231010437, "learning_rate": 7.410619699026145e-07, "loss": 0.4777, "step": 11604 }, { "epoch": 2.9372310807390534, "grad_norm": 0.18996360898017883, "learning_rate": 7.401576982422298e-07, "loss": 0.4718, "step": 11605 }, { "epoch": 2.9374841812199444, "grad_norm": 0.18705083429813385, "learning_rate": 7.392539574289614e-07, "loss": 0.4578, "step": 11606 }, { "epoch": 2.9377372817008354, "grad_norm": 0.1858365535736084, "learning_rate": 7.383507475146212e-07, "loss": 0.4323, "step": 11607 }, { "epoch": 2.9379903821817264, "grad_norm": 0.2636418640613556, "learning_rate": 7.374480685509855e-07, "loss": 0.458, "step": 11608 }, { "epoch": 2.938243482662617, "grad_norm": 0.21301311254501343, "learning_rate": 7.365459205898029e-07, "loss": 0.4475, "step": 11609 }, { "epoch": 2.938496583143508, "grad_norm": 0.1860761195421219, "learning_rate": 7.356443036827921e-07, "loss": 0.4692, "step": 11610 }, { "epoch": 2.938749683624399, "grad_norm": 0.1805121898651123, "learning_rate": 7.347432178816417e-07, "loss": 0.4734, "step": 11611 }, { "epoch": 2.93900278410529, "grad_norm": 0.18948477506637573, "learning_rate": 7.338426632380113e-07, "loss": 0.4476, "step": 11612 }, { "epoch": 2.9392558845861805, "grad_norm": 0.19300508499145508, "learning_rate": 7.329426398035233e-07, "loss": 0.4555, "step": 11613 }, { "epoch": 2.9395089850670715, "grad_norm": 0.1907675862312317, "learning_rate": 7.320431476297763e-07, "loss": 0.4604, "step": 11614 }, { "epoch": 2.9397620855479625, "grad_norm": 0.19558900594711304, "learning_rate": 7.311441867683367e-07, "loss": 0.4761, "step": 11615 }, { "epoch": 2.9400151860288535, "grad_norm": 0.1869809925556183, "learning_rate": 7.302457572707378e-07, "loss": 0.4528, "step": 11616 }, { "epoch": 2.9402682865097445, "grad_norm": 0.18826086819171906, "learning_rate": 7.293478591884861e-07, "loss": 0.4722, "step": 11617 }, { "epoch": 2.9405213869906355, "grad_norm": 0.1907854825258255, "learning_rate": 7.284504925730584e-07, "loss": 0.4463, "step": 11618 }, { "epoch": 2.940774487471526, "grad_norm": 0.18426628410816193, "learning_rate": 7.275536574758957e-07, "loss": 0.4406, "step": 11619 }, { "epoch": 2.941027587952417, "grad_norm": 0.20400741696357727, "learning_rate": 7.266573539484134e-07, "loss": 0.458, "step": 11620 }, { "epoch": 2.941280688433308, "grad_norm": 0.19139613211154938, "learning_rate": 7.257615820419928e-07, "loss": 0.4537, "step": 11621 }, { "epoch": 2.941533788914199, "grad_norm": 0.18646414577960968, "learning_rate": 7.248663418079882e-07, "loss": 0.4385, "step": 11622 }, { "epoch": 2.9417868893950896, "grad_norm": 0.18568362295627594, "learning_rate": 7.23971633297722e-07, "loss": 0.4577, "step": 11623 }, { "epoch": 2.9420399898759806, "grad_norm": 0.190677672624588, "learning_rate": 7.230774565624843e-07, "loss": 0.4743, "step": 11624 }, { "epoch": 2.9422930903568716, "grad_norm": 0.19094318151474, "learning_rate": 7.221838116535384e-07, "loss": 0.449, "step": 11625 }, { "epoch": 2.9425461908377626, "grad_norm": 0.196162611246109, "learning_rate": 7.212906986221135e-07, "loss": 0.4594, "step": 11626 }, { "epoch": 2.9427992913186536, "grad_norm": 0.19636952877044678, "learning_rate": 7.203981175194097e-07, "loss": 0.4676, "step": 11627 }, { "epoch": 2.9430523917995446, "grad_norm": 0.19362714886665344, "learning_rate": 7.195060683965993e-07, "loss": 0.4591, "step": 11628 }, { "epoch": 2.9433054922804356, "grad_norm": 0.1864919364452362, "learning_rate": 7.186145513048182e-07, "loss": 0.4515, "step": 11629 }, { "epoch": 2.943558592761326, "grad_norm": 0.19103768467903137, "learning_rate": 7.177235662951776e-07, "loss": 0.4595, "step": 11630 }, { "epoch": 2.943811693242217, "grad_norm": 0.1869928389787674, "learning_rate": 7.168331134187545e-07, "loss": 0.4535, "step": 11631 }, { "epoch": 2.944064793723108, "grad_norm": 0.18728037178516388, "learning_rate": 7.159431927265981e-07, "loss": 0.4511, "step": 11632 }, { "epoch": 2.944317894203999, "grad_norm": 0.19585949182510376, "learning_rate": 7.150538042697242e-07, "loss": 0.4647, "step": 11633 }, { "epoch": 2.9445709946848897, "grad_norm": 0.18154625594615936, "learning_rate": 7.14164948099122e-07, "loss": 0.4285, "step": 11634 }, { "epoch": 2.9448240951657807, "grad_norm": 0.18900839984416962, "learning_rate": 7.132766242657419e-07, "loss": 0.4891, "step": 11635 }, { "epoch": 2.9450771956466717, "grad_norm": 0.18824106454849243, "learning_rate": 7.123888328205153e-07, "loss": 0.4439, "step": 11636 }, { "epoch": 2.9453302961275627, "grad_norm": 0.1807323694229126, "learning_rate": 7.115015738143361e-07, "loss": 0.4512, "step": 11637 }, { "epoch": 2.9455833966084537, "grad_norm": 0.1909935176372528, "learning_rate": 7.106148472980678e-07, "loss": 0.4406, "step": 11638 }, { "epoch": 2.9458364970893447, "grad_norm": 0.19548077881336212, "learning_rate": 7.097286533225456e-07, "loss": 0.4439, "step": 11639 }, { "epoch": 2.9460895975702353, "grad_norm": 0.1853741705417633, "learning_rate": 7.088429919385742e-07, "loss": 0.4331, "step": 11640 }, { "epoch": 2.9463426980511263, "grad_norm": 0.19067293405532837, "learning_rate": 7.079578631969219e-07, "loss": 0.4575, "step": 11641 }, { "epoch": 2.9465957985320173, "grad_norm": 0.18649311363697052, "learning_rate": 7.070732671483372e-07, "loss": 0.4849, "step": 11642 }, { "epoch": 2.9468488990129083, "grad_norm": 0.18159906566143036, "learning_rate": 7.061892038435292e-07, "loss": 0.4612, "step": 11643 }, { "epoch": 2.947101999493799, "grad_norm": 0.19093169271945953, "learning_rate": 7.053056733331787e-07, "loss": 0.4736, "step": 11644 }, { "epoch": 2.94735509997469, "grad_norm": 0.19763562083244324, "learning_rate": 7.044226756679406e-07, "loss": 0.4643, "step": 11645 }, { "epoch": 2.947608200455581, "grad_norm": 0.18652766942977905, "learning_rate": 7.035402108984301e-07, "loss": 0.4587, "step": 11646 }, { "epoch": 2.947861300936472, "grad_norm": 0.1869211196899414, "learning_rate": 7.02658279075239e-07, "loss": 0.4644, "step": 11647 }, { "epoch": 2.948114401417363, "grad_norm": 0.18556688725948334, "learning_rate": 7.017768802489255e-07, "loss": 0.4439, "step": 11648 }, { "epoch": 2.948367501898254, "grad_norm": 0.1857711225748062, "learning_rate": 7.008960144700216e-07, "loss": 0.4799, "step": 11649 }, { "epoch": 2.9486206023791444, "grad_norm": 0.19167271256446838, "learning_rate": 7.000156817890236e-07, "loss": 0.4482, "step": 11650 }, { "epoch": 2.9488737028600354, "grad_norm": 0.1945657581090927, "learning_rate": 6.991358822563999e-07, "loss": 0.4637, "step": 11651 }, { "epoch": 2.9491268033409264, "grad_norm": 0.18932831287384033, "learning_rate": 6.982566159225856e-07, "loss": 0.4356, "step": 11652 }, { "epoch": 2.9493799038218174, "grad_norm": 0.1865207850933075, "learning_rate": 6.973778828379874e-07, "loss": 0.4681, "step": 11653 }, { "epoch": 2.949633004302708, "grad_norm": 0.18976818025112152, "learning_rate": 6.964996830529813e-07, "loss": 0.4644, "step": 11654 }, { "epoch": 2.949886104783599, "grad_norm": 0.18625977635383606, "learning_rate": 6.956220166179151e-07, "loss": 0.4782, "step": 11655 }, { "epoch": 2.95013920526449, "grad_norm": 0.18964163959026337, "learning_rate": 6.947448835831017e-07, "loss": 0.466, "step": 11656 }, { "epoch": 2.950392305745381, "grad_norm": 0.18916375935077667, "learning_rate": 6.938682839988264e-07, "loss": 0.4455, "step": 11657 }, { "epoch": 2.950645406226272, "grad_norm": 0.18642979860305786, "learning_rate": 6.929922179153403e-07, "loss": 0.443, "step": 11658 }, { "epoch": 2.950898506707163, "grad_norm": 0.19822372496128082, "learning_rate": 6.921166853828687e-07, "loss": 0.4652, "step": 11659 }, { "epoch": 2.951151607188054, "grad_norm": 0.18140508234500885, "learning_rate": 6.912416864516025e-07, "loss": 0.4506, "step": 11660 }, { "epoch": 2.9514047076689445, "grad_norm": 0.18640242516994476, "learning_rate": 6.903672211717038e-07, "loss": 0.446, "step": 11661 }, { "epoch": 2.9516578081498355, "grad_norm": 0.1832420378923416, "learning_rate": 6.89493289593306e-07, "loss": 0.4558, "step": 11662 }, { "epoch": 2.9519109086307265, "grad_norm": 0.1878858059644699, "learning_rate": 6.886198917665077e-07, "loss": 0.457, "step": 11663 }, { "epoch": 2.9521640091116175, "grad_norm": 0.18698805570602417, "learning_rate": 6.877470277413789e-07, "loss": 0.4706, "step": 11664 }, { "epoch": 2.952417109592508, "grad_norm": 0.18544527888298035, "learning_rate": 6.868746975679585e-07, "loss": 0.4493, "step": 11665 }, { "epoch": 2.952670210073399, "grad_norm": 0.18883757293224335, "learning_rate": 6.860029012962577e-07, "loss": 0.4632, "step": 11666 }, { "epoch": 2.95292331055429, "grad_norm": 0.21968208253383636, "learning_rate": 6.851316389762508e-07, "loss": 0.4569, "step": 11667 }, { "epoch": 2.953176411035181, "grad_norm": 0.19100706279277802, "learning_rate": 6.842609106578913e-07, "loss": 0.4637, "step": 11668 }, { "epoch": 2.953429511516072, "grad_norm": 0.2097778469324112, "learning_rate": 6.833907163910913e-07, "loss": 0.4632, "step": 11669 }, { "epoch": 2.953682611996963, "grad_norm": 0.18554669618606567, "learning_rate": 6.825210562257389e-07, "loss": 0.4712, "step": 11670 }, { "epoch": 2.9539357124778536, "grad_norm": 0.18479089438915253, "learning_rate": 6.816519302116897e-07, "loss": 0.4533, "step": 11671 }, { "epoch": 2.9541888129587446, "grad_norm": 0.1873941868543625, "learning_rate": 6.807833383987694e-07, "loss": 0.4367, "step": 11672 }, { "epoch": 2.9544419134396356, "grad_norm": 0.18840470910072327, "learning_rate": 6.799152808367715e-07, "loss": 0.4802, "step": 11673 }, { "epoch": 2.9546950139205266, "grad_norm": 0.19704148173332214, "learning_rate": 6.790477575754606e-07, "loss": 0.4831, "step": 11674 }, { "epoch": 2.954948114401417, "grad_norm": 0.1902862787246704, "learning_rate": 6.781807686645692e-07, "loss": 0.4823, "step": 11675 }, { "epoch": 2.955201214882308, "grad_norm": 0.1883612424135208, "learning_rate": 6.773143141538007e-07, "loss": 0.4344, "step": 11676 }, { "epoch": 2.955454315363199, "grad_norm": 0.1830553561449051, "learning_rate": 6.764483940928268e-07, "loss": 0.4669, "step": 11677 }, { "epoch": 2.95570741584409, "grad_norm": 0.18950699269771576, "learning_rate": 6.755830085312886e-07, "loss": 0.4646, "step": 11678 }, { "epoch": 2.955960516324981, "grad_norm": 0.2045084834098816, "learning_rate": 6.747181575187978e-07, "loss": 0.4597, "step": 11679 }, { "epoch": 2.956213616805872, "grad_norm": 0.19076178967952728, "learning_rate": 6.738538411049334e-07, "loss": 0.4381, "step": 11680 }, { "epoch": 2.9564667172867627, "grad_norm": 0.1912718415260315, "learning_rate": 6.729900593392458e-07, "loss": 0.4751, "step": 11681 }, { "epoch": 2.9567198177676537, "grad_norm": 0.1870332807302475, "learning_rate": 6.721268122712532e-07, "loss": 0.4677, "step": 11682 }, { "epoch": 2.9569729182485447, "grad_norm": 0.19420701265335083, "learning_rate": 6.712640999504438e-07, "loss": 0.4503, "step": 11683 }, { "epoch": 2.9572260187294357, "grad_norm": 0.1839200258255005, "learning_rate": 6.704019224262758e-07, "loss": 0.4446, "step": 11684 }, { "epoch": 2.9574791192103262, "grad_norm": 0.19524402916431427, "learning_rate": 6.695402797481732e-07, "loss": 0.4445, "step": 11685 }, { "epoch": 2.9577322196912172, "grad_norm": 0.1843196451663971, "learning_rate": 6.68679171965535e-07, "loss": 0.4572, "step": 11686 }, { "epoch": 2.9579853201721082, "grad_norm": 0.18790656328201294, "learning_rate": 6.678185991277253e-07, "loss": 0.4443, "step": 11687 }, { "epoch": 2.9582384206529992, "grad_norm": 0.18682771921157837, "learning_rate": 6.669585612840801e-07, "loss": 0.4434, "step": 11688 }, { "epoch": 2.9584915211338902, "grad_norm": 0.18903575837612152, "learning_rate": 6.660990584839033e-07, "loss": 0.453, "step": 11689 }, { "epoch": 2.9587446216147812, "grad_norm": 0.18971297144889832, "learning_rate": 6.6524009077647e-07, "loss": 0.4611, "step": 11690 }, { "epoch": 2.9589977220956722, "grad_norm": 0.1835726797580719, "learning_rate": 6.643816582110186e-07, "loss": 0.4434, "step": 11691 }, { "epoch": 2.959250822576563, "grad_norm": 0.18050336837768555, "learning_rate": 6.635237608367617e-07, "loss": 0.4662, "step": 11692 }, { "epoch": 2.959503923057454, "grad_norm": 0.18825647234916687, "learning_rate": 6.626663987028848e-07, "loss": 0.4873, "step": 11693 }, { "epoch": 2.959757023538345, "grad_norm": 0.1903613805770874, "learning_rate": 6.618095718585371e-07, "loss": 0.4475, "step": 11694 }, { "epoch": 2.960010124019236, "grad_norm": 0.18788281083106995, "learning_rate": 6.609532803528385e-07, "loss": 0.4708, "step": 11695 }, { "epoch": 2.9602632245001264, "grad_norm": 0.18697459995746613, "learning_rate": 6.600975242348773e-07, "loss": 0.4378, "step": 11696 }, { "epoch": 2.9605163249810174, "grad_norm": 0.18231557309627533, "learning_rate": 6.592423035537121e-07, "loss": 0.4266, "step": 11697 }, { "epoch": 2.9607694254619084, "grad_norm": 0.1832209676504135, "learning_rate": 6.583876183583704e-07, "loss": 0.4282, "step": 11698 }, { "epoch": 2.9610225259427994, "grad_norm": 0.18421714007854462, "learning_rate": 6.575334686978518e-07, "loss": 0.4824, "step": 11699 }, { "epoch": 2.9612756264236904, "grad_norm": 0.18886402249336243, "learning_rate": 6.566798546211228e-07, "loss": 0.4574, "step": 11700 }, { "epoch": 2.9615287269045814, "grad_norm": 0.19001056253910065, "learning_rate": 6.558267761771187e-07, "loss": 0.4353, "step": 11701 }, { "epoch": 2.961781827385472, "grad_norm": 0.19590547680854797, "learning_rate": 6.549742334147435e-07, "loss": 0.4499, "step": 11702 }, { "epoch": 2.962034927866363, "grad_norm": 0.18829494714736938, "learning_rate": 6.541222263828728e-07, "loss": 0.4437, "step": 11703 }, { "epoch": 2.962288028347254, "grad_norm": 0.186531662940979, "learning_rate": 6.532707551303486e-07, "loss": 0.4704, "step": 11704 }, { "epoch": 2.962541128828145, "grad_norm": 0.18551725149154663, "learning_rate": 6.52419819705985e-07, "loss": 0.448, "step": 11705 }, { "epoch": 2.9627942293090355, "grad_norm": 0.18782514333724976, "learning_rate": 6.515694201585676e-07, "loss": 0.4606, "step": 11706 }, { "epoch": 2.9630473297899265, "grad_norm": 0.1921602338552475, "learning_rate": 6.507195565368429e-07, "loss": 0.4684, "step": 11707 }, { "epoch": 2.9633004302708175, "grad_norm": 0.19237901270389557, "learning_rate": 6.498702288895342e-07, "loss": 0.4249, "step": 11708 }, { "epoch": 2.9635535307517085, "grad_norm": 0.19046133756637573, "learning_rate": 6.490214372653314e-07, "loss": 0.4542, "step": 11709 }, { "epoch": 2.9638066312325995, "grad_norm": 0.18913787603378296, "learning_rate": 6.481731817128933e-07, "loss": 0.4813, "step": 11710 }, { "epoch": 2.9640597317134905, "grad_norm": 0.1847163587808609, "learning_rate": 6.4732546228085e-07, "loss": 0.4417, "step": 11711 }, { "epoch": 2.964312832194381, "grad_norm": 0.1943233460187912, "learning_rate": 6.464782790177981e-07, "loss": 0.4909, "step": 11712 }, { "epoch": 2.964565932675272, "grad_norm": 0.18896809220314026, "learning_rate": 6.456316319723066e-07, "loss": 0.4655, "step": 11713 }, { "epoch": 2.964819033156163, "grad_norm": 0.18761999905109406, "learning_rate": 6.447855211929099e-07, "loss": 0.4625, "step": 11714 }, { "epoch": 2.965072133637054, "grad_norm": 0.18706490099430084, "learning_rate": 6.439399467281138e-07, "loss": 0.4671, "step": 11715 }, { "epoch": 2.9653252341179446, "grad_norm": 0.18789027631282806, "learning_rate": 6.430949086263949e-07, "loss": 0.4467, "step": 11716 }, { "epoch": 2.9655783345988356, "grad_norm": 0.18931688368320465, "learning_rate": 6.422504069361968e-07, "loss": 0.4666, "step": 11717 }, { "epoch": 2.9658314350797266, "grad_norm": 0.18241700530052185, "learning_rate": 6.414064417059329e-07, "loss": 0.4578, "step": 11718 }, { "epoch": 2.9660845355606176, "grad_norm": 0.19307011365890503, "learning_rate": 6.405630129839857e-07, "loss": 0.4795, "step": 11719 }, { "epoch": 2.9663376360415086, "grad_norm": 0.19311004877090454, "learning_rate": 6.397201208187065e-07, "loss": 0.4657, "step": 11720 }, { "epoch": 2.9665907365223996, "grad_norm": 0.18852753937244415, "learning_rate": 6.388777652584177e-07, "loss": 0.4717, "step": 11721 }, { "epoch": 2.9668438370032906, "grad_norm": 0.1862674206495285, "learning_rate": 6.380359463514096e-07, "loss": 0.4618, "step": 11722 }, { "epoch": 2.967096937484181, "grad_norm": 0.19087187945842743, "learning_rate": 6.371946641459425e-07, "loss": 0.475, "step": 11723 }, { "epoch": 2.967350037965072, "grad_norm": 0.19053782522678375, "learning_rate": 6.363539186902434e-07, "loss": 0.4415, "step": 11724 }, { "epoch": 2.967603138445963, "grad_norm": 0.1875046342611313, "learning_rate": 6.355137100325115e-07, "loss": 0.4465, "step": 11725 }, { "epoch": 2.9678562389268537, "grad_norm": 0.19229218363761902, "learning_rate": 6.346740382209149e-07, "loss": 0.4835, "step": 11726 }, { "epoch": 2.9681093394077447, "grad_norm": 0.19317340850830078, "learning_rate": 6.338349033035896e-07, "loss": 0.4487, "step": 11727 }, { "epoch": 2.9683624398886357, "grad_norm": 0.1874748021364212, "learning_rate": 6.329963053286414e-07, "loss": 0.4326, "step": 11728 }, { "epoch": 2.9686155403695267, "grad_norm": 0.19287648797035217, "learning_rate": 6.321582443441443e-07, "loss": 0.4646, "step": 11729 }, { "epoch": 2.9688686408504177, "grad_norm": 0.1889987736940384, "learning_rate": 6.313207203981442e-07, "loss": 0.4631, "step": 11730 }, { "epoch": 2.9691217413313087, "grad_norm": 0.18341542780399323, "learning_rate": 6.304837335386548e-07, "loss": 0.4373, "step": 11731 }, { "epoch": 2.9693748418121997, "grad_norm": 0.19036398828029633, "learning_rate": 6.29647283813657e-07, "loss": 0.4434, "step": 11732 }, { "epoch": 2.9696279422930902, "grad_norm": 0.18411314487457275, "learning_rate": 6.288113712711041e-07, "loss": 0.4636, "step": 11733 }, { "epoch": 2.9698810427739812, "grad_norm": 0.18122410774230957, "learning_rate": 6.279759959589182e-07, "loss": 0.4468, "step": 11734 }, { "epoch": 2.9701341432548722, "grad_norm": 0.18396757543087006, "learning_rate": 6.271411579249853e-07, "loss": 0.464, "step": 11735 }, { "epoch": 2.9703872437357632, "grad_norm": 0.19501356780529022, "learning_rate": 6.263068572171682e-07, "loss": 0.4724, "step": 11736 }, { "epoch": 2.970640344216654, "grad_norm": 0.18982267379760742, "learning_rate": 6.254730938832954e-07, "loss": 0.4549, "step": 11737 }, { "epoch": 2.970893444697545, "grad_norm": 0.1948448270559311, "learning_rate": 6.24639867971164e-07, "loss": 0.49, "step": 11738 }, { "epoch": 2.971146545178436, "grad_norm": 0.1916022151708603, "learning_rate": 6.238071795285428e-07, "loss": 0.4603, "step": 11739 }, { "epoch": 2.971399645659327, "grad_norm": 0.1901421695947647, "learning_rate": 6.229750286031678e-07, "loss": 0.44, "step": 11740 }, { "epoch": 2.971652746140218, "grad_norm": 0.186466246843338, "learning_rate": 6.221434152427419e-07, "loss": 0.4581, "step": 11741 }, { "epoch": 2.971905846621109, "grad_norm": 0.1860732138156891, "learning_rate": 6.213123394949416e-07, "loss": 0.4705, "step": 11742 }, { "epoch": 2.9721589471019993, "grad_norm": 0.1816946566104889, "learning_rate": 6.204818014074087e-07, "loss": 0.4467, "step": 11743 }, { "epoch": 2.9724120475828903, "grad_norm": 0.18982113897800446, "learning_rate": 6.196518010277586e-07, "loss": 0.437, "step": 11744 }, { "epoch": 2.9726651480637813, "grad_norm": 0.18964141607284546, "learning_rate": 6.188223384035752e-07, "loss": 0.4729, "step": 11745 }, { "epoch": 2.9729182485446723, "grad_norm": 0.1939803510904312, "learning_rate": 6.179934135824062e-07, "loss": 0.4772, "step": 11746 }, { "epoch": 2.973171349025563, "grad_norm": 0.1876848191022873, "learning_rate": 6.171650266117724e-07, "loss": 0.4303, "step": 11747 }, { "epoch": 2.973424449506454, "grad_norm": 0.18775109946727753, "learning_rate": 6.163371775391646e-07, "loss": 0.4582, "step": 11748 }, { "epoch": 2.973677549987345, "grad_norm": 0.1927519291639328, "learning_rate": 6.155098664120396e-07, "loss": 0.4743, "step": 11749 }, { "epoch": 2.973930650468236, "grad_norm": 0.18280759453773499, "learning_rate": 6.146830932778291e-07, "loss": 0.4485, "step": 11750 }, { "epoch": 2.974183750949127, "grad_norm": 0.18573521077632904, "learning_rate": 6.1385685818393e-07, "loss": 0.4465, "step": 11751 }, { "epoch": 2.974436851430018, "grad_norm": 0.18957218527793884, "learning_rate": 6.130311611777051e-07, "loss": 0.4693, "step": 11752 }, { "epoch": 2.974689951910909, "grad_norm": 0.1933707594871521, "learning_rate": 6.122060023064924e-07, "loss": 0.4497, "step": 11753 }, { "epoch": 2.9749430523917995, "grad_norm": 0.19016434252262115, "learning_rate": 6.113813816175951e-07, "loss": 0.4533, "step": 11754 }, { "epoch": 2.9751961528726905, "grad_norm": 0.1835828572511673, "learning_rate": 6.105572991582875e-07, "loss": 0.436, "step": 11755 }, { "epoch": 2.9754492533535815, "grad_norm": 0.18420590460300446, "learning_rate": 6.097337549758131e-07, "loss": 0.468, "step": 11756 }, { "epoch": 2.975702353834472, "grad_norm": 0.18732218444347382, "learning_rate": 6.089107491173829e-07, "loss": 0.455, "step": 11757 }, { "epoch": 2.975955454315363, "grad_norm": 0.19663476943969727, "learning_rate": 6.080882816301792e-07, "loss": 0.4687, "step": 11758 }, { "epoch": 2.976208554796254, "grad_norm": 0.1918797343969345, "learning_rate": 6.072663525613509e-07, "loss": 0.4356, "step": 11759 }, { "epoch": 2.976461655277145, "grad_norm": 0.19326627254486084, "learning_rate": 6.064449619580193e-07, "loss": 0.4635, "step": 11760 }, { "epoch": 2.976714755758036, "grad_norm": 0.18573081493377686, "learning_rate": 6.056241098672721e-07, "loss": 0.4853, "step": 11761 }, { "epoch": 2.976967856238927, "grad_norm": 0.1987036168575287, "learning_rate": 6.048037963361663e-07, "loss": 0.4651, "step": 11762 }, { "epoch": 2.977220956719818, "grad_norm": 0.1857532411813736, "learning_rate": 6.039840214117298e-07, "loss": 0.4627, "step": 11763 }, { "epoch": 2.9774740572007086, "grad_norm": 0.18889285624027252, "learning_rate": 6.031647851409583e-07, "loss": 0.4794, "step": 11764 }, { "epoch": 2.9777271576815996, "grad_norm": 0.2088807225227356, "learning_rate": 6.023460875708165e-07, "loss": 0.4784, "step": 11765 }, { "epoch": 2.9779802581624906, "grad_norm": 0.1817900389432907, "learning_rate": 6.015279287482378e-07, "loss": 0.4612, "step": 11766 }, { "epoch": 2.9782333586433816, "grad_norm": 0.19130201637744904, "learning_rate": 6.007103087201271e-07, "loss": 0.4302, "step": 11767 }, { "epoch": 2.978486459124272, "grad_norm": 0.18874883651733398, "learning_rate": 5.998932275333569e-07, "loss": 0.4581, "step": 11768 }, { "epoch": 2.978739559605163, "grad_norm": 0.18429315090179443, "learning_rate": 5.990766852347674e-07, "loss": 0.46, "step": 11769 }, { "epoch": 2.978992660086054, "grad_norm": 0.1910531371831894, "learning_rate": 5.9826068187117e-07, "loss": 0.4605, "step": 11770 }, { "epoch": 2.979245760566945, "grad_norm": 0.1952236145734787, "learning_rate": 5.974452174893452e-07, "loss": 0.4583, "step": 11771 }, { "epoch": 2.979498861047836, "grad_norm": 0.1880480945110321, "learning_rate": 5.966302921360401e-07, "loss": 0.4597, "step": 11772 }, { "epoch": 2.979751961528727, "grad_norm": 0.1910647600889206, "learning_rate": 5.958159058579749e-07, "loss": 0.4542, "step": 11773 }, { "epoch": 2.9800050620096177, "grad_norm": 0.20231662690639496, "learning_rate": 5.950020587018324e-07, "loss": 0.4781, "step": 11774 }, { "epoch": 2.9802581624905087, "grad_norm": 0.1883973330259323, "learning_rate": 5.941887507142729e-07, "loss": 0.4287, "step": 11775 }, { "epoch": 2.9805112629713997, "grad_norm": 0.18831861019134521, "learning_rate": 5.933759819419205e-07, "loss": 0.436, "step": 11776 }, { "epoch": 2.9807643634522907, "grad_norm": 0.1878279745578766, "learning_rate": 5.925637524313699e-07, "loss": 0.4512, "step": 11777 }, { "epoch": 2.9810174639331812, "grad_norm": 0.18652276694774628, "learning_rate": 5.91752062229185e-07, "loss": 0.445, "step": 11778 }, { "epoch": 2.9812705644140722, "grad_norm": 0.18836620450019836, "learning_rate": 5.909409113818954e-07, "loss": 0.4433, "step": 11779 }, { "epoch": 2.9815236648949632, "grad_norm": 0.19108498096466064, "learning_rate": 5.901302999360037e-07, "loss": 0.4486, "step": 11780 }, { "epoch": 2.9817767653758542, "grad_norm": 0.19347545504570007, "learning_rate": 5.893202279379817e-07, "loss": 0.4624, "step": 11781 }, { "epoch": 2.9820298658567452, "grad_norm": 0.18585361540317535, "learning_rate": 5.88510695434269e-07, "loss": 0.4256, "step": 11782 }, { "epoch": 2.9822829663376362, "grad_norm": 0.1912563294172287, "learning_rate": 5.87701702471275e-07, "loss": 0.464, "step": 11783 }, { "epoch": 2.9825360668185272, "grad_norm": 0.19689859449863434, "learning_rate": 5.868932490953772e-07, "loss": 0.4552, "step": 11784 }, { "epoch": 2.982789167299418, "grad_norm": 0.18818646669387817, "learning_rate": 5.860853353529206e-07, "loss": 0.4677, "step": 11785 }, { "epoch": 2.983042267780309, "grad_norm": 0.18427351117134094, "learning_rate": 5.852779612902226e-07, "loss": 0.4642, "step": 11786 }, { "epoch": 2.9832953682612, "grad_norm": 0.18644307553768158, "learning_rate": 5.844711269535663e-07, "loss": 0.4663, "step": 11787 }, { "epoch": 2.9835484687420903, "grad_norm": 0.19044728577136993, "learning_rate": 5.836648323892102e-07, "loss": 0.441, "step": 11788 }, { "epoch": 2.9838015692229813, "grad_norm": 0.18350453674793243, "learning_rate": 5.82859077643374e-07, "loss": 0.447, "step": 11789 }, { "epoch": 2.9840546697038723, "grad_norm": 0.18359337747097015, "learning_rate": 5.820538627622519e-07, "loss": 0.4191, "step": 11790 }, { "epoch": 2.9843077701847633, "grad_norm": 0.19832295179367065, "learning_rate": 5.812491877920035e-07, "loss": 0.4702, "step": 11791 }, { "epoch": 2.9845608706656543, "grad_norm": 0.1945285052061081, "learning_rate": 5.804450527787597e-07, "loss": 0.4547, "step": 11792 }, { "epoch": 2.9848139711465453, "grad_norm": 0.18338216841220856, "learning_rate": 5.796414577686182e-07, "loss": 0.4689, "step": 11793 }, { "epoch": 2.9850670716274363, "grad_norm": 0.1897604763507843, "learning_rate": 5.788384028076499e-07, "loss": 0.4587, "step": 11794 }, { "epoch": 2.985320172108327, "grad_norm": 0.18644997477531433, "learning_rate": 5.780358879418935e-07, "loss": 0.4641, "step": 11795 }, { "epoch": 2.985573272589218, "grad_norm": 0.1869152933359146, "learning_rate": 5.772339132173521e-07, "loss": 0.4521, "step": 11796 }, { "epoch": 2.985826373070109, "grad_norm": 0.18682311475276947, "learning_rate": 5.764324786800024e-07, "loss": 0.4664, "step": 11797 }, { "epoch": 2.986079473551, "grad_norm": 0.1936790496110916, "learning_rate": 5.756315843757887e-07, "loss": 0.461, "step": 11798 }, { "epoch": 2.9863325740318905, "grad_norm": 0.18798358738422394, "learning_rate": 5.748312303506254e-07, "loss": 0.4476, "step": 11799 }, { "epoch": 2.9865856745127815, "grad_norm": 0.18284328281879425, "learning_rate": 5.740314166503924e-07, "loss": 0.4334, "step": 11800 }, { "epoch": 2.9868387749936725, "grad_norm": 0.18802393972873688, "learning_rate": 5.732321433209465e-07, "loss": 0.454, "step": 11801 }, { "epoch": 2.9870918754745635, "grad_norm": 0.18481431901454926, "learning_rate": 5.724334104081042e-07, "loss": 0.4593, "step": 11802 }, { "epoch": 2.9873449759554545, "grad_norm": 0.1870669424533844, "learning_rate": 5.716352179576567e-07, "loss": 0.4416, "step": 11803 }, { "epoch": 2.9875980764363455, "grad_norm": 0.1892787516117096, "learning_rate": 5.70837566015362e-07, "loss": 0.4745, "step": 11804 }, { "epoch": 2.987851176917236, "grad_norm": 0.1992531418800354, "learning_rate": 5.700404546269478e-07, "loss": 0.45, "step": 11805 }, { "epoch": 2.988104277398127, "grad_norm": 0.18963909149169922, "learning_rate": 5.692438838381109e-07, "loss": 0.4486, "step": 11806 }, { "epoch": 2.988357377879018, "grad_norm": 0.19802281260490417, "learning_rate": 5.684478536945182e-07, "loss": 0.4436, "step": 11807 }, { "epoch": 2.988610478359909, "grad_norm": 0.1926746666431427, "learning_rate": 5.67652364241803e-07, "loss": 0.4601, "step": 11808 }, { "epoch": 2.9888635788407996, "grad_norm": 0.18533720076084137, "learning_rate": 5.66857415525569e-07, "loss": 0.4784, "step": 11809 }, { "epoch": 2.9891166793216906, "grad_norm": 0.20400449633598328, "learning_rate": 5.660630075913898e-07, "loss": 0.4497, "step": 11810 }, { "epoch": 2.9893697798025816, "grad_norm": 0.19042539596557617, "learning_rate": 5.652691404848065e-07, "loss": 0.4177, "step": 11811 }, { "epoch": 2.9896228802834726, "grad_norm": 0.18798480927944183, "learning_rate": 5.644758142513296e-07, "loss": 0.4625, "step": 11812 }, { "epoch": 2.9898759807643636, "grad_norm": 0.18518711626529694, "learning_rate": 5.636830289364403e-07, "loss": 0.4498, "step": 11813 }, { "epoch": 2.9901290812452546, "grad_norm": 0.18554562330245972, "learning_rate": 5.628907845855858e-07, "loss": 0.4581, "step": 11814 }, { "epoch": 2.990382181726145, "grad_norm": 0.2003057897090912, "learning_rate": 5.62099081244184e-07, "loss": 0.4478, "step": 11815 }, { "epoch": 2.990635282207036, "grad_norm": 0.18586499989032745, "learning_rate": 5.613079189576209e-07, "loss": 0.4405, "step": 11816 }, { "epoch": 2.990888382687927, "grad_norm": 0.19827376306056976, "learning_rate": 5.605172977712558e-07, "loss": 0.455, "step": 11817 }, { "epoch": 2.991141483168818, "grad_norm": 0.19634340703487396, "learning_rate": 5.59727217730408e-07, "loss": 0.4673, "step": 11818 }, { "epoch": 2.9913945836497087, "grad_norm": 0.18813776969909668, "learning_rate": 5.589376788803735e-07, "loss": 0.4608, "step": 11819 }, { "epoch": 2.9916476841305997, "grad_norm": 0.18540968000888824, "learning_rate": 5.581486812664161e-07, "loss": 0.4578, "step": 11820 }, { "epoch": 2.9919007846114907, "grad_norm": 0.18288719654083252, "learning_rate": 5.573602249337661e-07, "loss": 0.4583, "step": 11821 }, { "epoch": 2.9921538850923817, "grad_norm": 0.193703293800354, "learning_rate": 5.565723099276254e-07, "loss": 0.4795, "step": 11822 }, { "epoch": 2.9924069855732727, "grad_norm": 0.18392619490623474, "learning_rate": 5.557849362931623e-07, "loss": 0.4388, "step": 11823 }, { "epoch": 2.9926600860541637, "grad_norm": 0.18764670193195343, "learning_rate": 5.549981040755137e-07, "loss": 0.4488, "step": 11824 }, { "epoch": 2.9929131865350547, "grad_norm": 0.19072985649108887, "learning_rate": 5.542118133197893e-07, "loss": 0.4331, "step": 11825 }, { "epoch": 2.9931662870159452, "grad_norm": 0.1866089403629303, "learning_rate": 5.534260640710642e-07, "loss": 0.4516, "step": 11826 }, { "epoch": 2.9934193874968362, "grad_norm": 0.22593863308429718, "learning_rate": 5.526408563743857e-07, "loss": 0.4764, "step": 11827 }, { "epoch": 2.9936724879777272, "grad_norm": 0.1908194124698639, "learning_rate": 5.518561902747677e-07, "loss": 0.4591, "step": 11828 }, { "epoch": 2.9939255884586182, "grad_norm": 0.19012349843978882, "learning_rate": 5.510720658171908e-07, "loss": 0.4399, "step": 11829 }, { "epoch": 2.994178688939509, "grad_norm": 0.1884472370147705, "learning_rate": 5.502884830466082e-07, "loss": 0.449, "step": 11830 }, { "epoch": 2.9944317894204, "grad_norm": 0.1896829754114151, "learning_rate": 5.495054420079415e-07, "loss": 0.4645, "step": 11831 }, { "epoch": 2.994684889901291, "grad_norm": 0.1874093860387802, "learning_rate": 5.487229427460828e-07, "loss": 0.4337, "step": 11832 }, { "epoch": 2.994937990382182, "grad_norm": 0.18890288472175598, "learning_rate": 5.479409853058882e-07, "loss": 0.4605, "step": 11833 }, { "epoch": 2.995191090863073, "grad_norm": 0.19269196689128876, "learning_rate": 5.471595697321885e-07, "loss": 0.4759, "step": 11834 }, { "epoch": 2.995444191343964, "grad_norm": 0.18547116219997406, "learning_rate": 5.463786960697781e-07, "loss": 0.4512, "step": 11835 }, { "epoch": 2.9956972918248543, "grad_norm": 0.1889868676662445, "learning_rate": 5.455983643634221e-07, "loss": 0.4472, "step": 11836 }, { "epoch": 2.9959503923057453, "grad_norm": 0.1881723701953888, "learning_rate": 5.448185746578571e-07, "loss": 0.4555, "step": 11837 }, { "epoch": 2.9962034927866363, "grad_norm": 0.2013358771800995, "learning_rate": 5.440393269977873e-07, "loss": 0.4637, "step": 11838 }, { "epoch": 2.9964565932675273, "grad_norm": 0.18867194652557373, "learning_rate": 5.432606214278857e-07, "loss": 0.4401, "step": 11839 }, { "epoch": 2.996709693748418, "grad_norm": 0.20814040303230286, "learning_rate": 5.424824579927912e-07, "loss": 0.4516, "step": 11840 }, { "epoch": 2.996962794229309, "grad_norm": 0.18435606360435486, "learning_rate": 5.417048367371158e-07, "loss": 0.4581, "step": 11841 }, { "epoch": 2.9972158947102, "grad_norm": 0.19467267394065857, "learning_rate": 5.409277577054383e-07, "loss": 0.4695, "step": 11842 }, { "epoch": 2.997468995191091, "grad_norm": 0.1910870224237442, "learning_rate": 5.401512209423088e-07, "loss": 0.4316, "step": 11843 }, { "epoch": 2.997722095671982, "grad_norm": 0.22541919350624084, "learning_rate": 5.393752264922403e-07, "loss": 0.4451, "step": 11844 }, { "epoch": 2.997975196152873, "grad_norm": 0.1833033412694931, "learning_rate": 5.385997743997251e-07, "loss": 0.4449, "step": 11845 }, { "epoch": 2.9982282966337634, "grad_norm": 0.18301238119602203, "learning_rate": 5.378248647092132e-07, "loss": 0.4461, "step": 11846 }, { "epoch": 2.9984813971146544, "grad_norm": 0.19071070849895477, "learning_rate": 5.370504974651314e-07, "loss": 0.4541, "step": 11847 }, { "epoch": 2.9987344975955454, "grad_norm": 0.1906665414571762, "learning_rate": 5.362766727118707e-07, "loss": 0.4403, "step": 11848 }, { "epoch": 2.9989875980764364, "grad_norm": 0.1866002082824707, "learning_rate": 5.355033904937934e-07, "loss": 0.4331, "step": 11849 }, { "epoch": 2.999240698557327, "grad_norm": 0.1894860714673996, "learning_rate": 5.347306508552286e-07, "loss": 0.4624, "step": 11850 }, { "epoch": 2.999493799038218, "grad_norm": 0.1890742927789688, "learning_rate": 5.339584538404796e-07, "loss": 0.4609, "step": 11851 }, { "epoch": 2.999746899519109, "grad_norm": 0.18505552411079407, "learning_rate": 5.331867994938123e-07, "loss": 0.4425, "step": 11852 }, { "epoch": 3.0, "grad_norm": 0.1881731152534485, "learning_rate": 5.324156878594645e-07, "loss": 0.4474, "step": 11853 }, { "epoch": 3.0, "eval_loss": 0.8066326975822449, "eval_runtime": 1038.539, "eval_samples_per_second": 40.941, "eval_steps_per_second": 0.64, "step": 11853 } ], "logging_steps": 1, "max_steps": 13221, "num_input_tokens_seen": 0, "num_train_epochs": 4, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 9.09033140958215e+20, "train_batch_size": 8, "trial_name": null, "trial_params": null }