{ "best_global_step": null, "best_metric": null, "best_model_checkpoint": null, "epoch": 2.999146612049838, "eval_steps": 500, "global_step": 8787, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0003413551800648575, "grad_norm": 3.122675254835389, "learning_rate": 0.0, "loss": 0.5106, "num_tokens": 195218.0, "step": 1 }, { "epoch": 0.000682710360129715, "grad_norm": 2.9605086705288617, "learning_rate": 5.6882821387940845e-08, "loss": 0.482, "num_tokens": 406566.0, "step": 2 }, { "epoch": 0.0010240655401945725, "grad_norm": 3.1233224493931564, "learning_rate": 1.1376564277588169e-07, "loss": 0.5003, "num_tokens": 588650.0, "step": 3 }, { "epoch": 0.00136542072025943, "grad_norm": 2.7682637219899244, "learning_rate": 1.7064846416382255e-07, "loss": 0.4658, "num_tokens": 781866.0, "step": 4 }, { "epoch": 0.0017067759003242873, "grad_norm": 3.4046721387690333, "learning_rate": 2.2753128555176338e-07, "loss": 0.5256, "num_tokens": 948239.0, "step": 5 }, { "epoch": 0.002048131080389145, "grad_norm": 3.026048406691866, "learning_rate": 2.8441410693970424e-07, "loss": 0.5213, "num_tokens": 1166564.0, "step": 6 }, { "epoch": 0.0023894862604540022, "grad_norm": 3.0347479829892055, "learning_rate": 3.412969283276451e-07, "loss": 0.5114, "num_tokens": 1402570.0, "step": 7 }, { "epoch": 0.00273084144051886, "grad_norm": 3.090935880589856, "learning_rate": 3.981797497155859e-07, "loss": 0.5029, "num_tokens": 1579245.0, "step": 8 }, { "epoch": 0.0030721966205837174, "grad_norm": 3.0904211456637602, "learning_rate": 4.5506257110352676e-07, "loss": 0.4901, "num_tokens": 1760235.0, "step": 9 }, { "epoch": 0.0034135518006485747, "grad_norm": 3.1222445461387216, "learning_rate": 5.119453924914676e-07, "loss": 0.4707, "num_tokens": 1931602.0, "step": 10 }, { "epoch": 0.0037549069807134325, "grad_norm": 2.9870258089188018, "learning_rate": 5.688282138794085e-07, "loss": 0.481, "num_tokens": 2134420.0, "step": 11 }, { "epoch": 0.00409626216077829, "grad_norm": 3.0037180084547157, "learning_rate": 6.257110352673493e-07, "loss": 0.4879, "num_tokens": 2313781.0, "step": 12 }, { "epoch": 0.004437617340843148, "grad_norm": 3.259817030610517, "learning_rate": 6.825938566552902e-07, "loss": 0.5087, "num_tokens": 2467457.0, "step": 13 }, { "epoch": 0.0047789725209080045, "grad_norm": 2.940230767753463, "learning_rate": 7.394766780432309e-07, "loss": 0.4628, "num_tokens": 2631613.0, "step": 14 }, { "epoch": 0.005120327700972862, "grad_norm": 2.885479357564472, "learning_rate": 7.963594994311718e-07, "loss": 0.4785, "num_tokens": 2805248.0, "step": 15 }, { "epoch": 0.00546168288103772, "grad_norm": 3.1705523139749214, "learning_rate": 8.532423208191127e-07, "loss": 0.4755, "num_tokens": 2958336.0, "step": 16 }, { "epoch": 0.005803038061102577, "grad_norm": 2.8018265395655018, "learning_rate": 9.101251422070535e-07, "loss": 0.483, "num_tokens": 3134277.0, "step": 17 }, { "epoch": 0.006144393241167435, "grad_norm": 2.652517679014235, "learning_rate": 9.670079635949945e-07, "loss": 0.4757, "num_tokens": 3341790.0, "step": 18 }, { "epoch": 0.0064857484212322925, "grad_norm": 2.517090163095806, "learning_rate": 1.0238907849829352e-06, "loss": 0.4607, "num_tokens": 3568223.0, "step": 19 }, { "epoch": 0.006827103601297149, "grad_norm": 2.1339918161755085, "learning_rate": 1.0807736063708762e-06, "loss": 0.4319, "num_tokens": 3751170.0, "step": 20 }, { "epoch": 0.007168458781362007, "grad_norm": 2.4583263253254612, "learning_rate": 1.137656427758817e-06, "loss": 0.4234, "num_tokens": 3904267.0, "step": 21 }, { "epoch": 0.007509813961426865, "grad_norm": 2.0398878085900467, "learning_rate": 1.194539249146758e-06, "loss": 0.4398, "num_tokens": 4109125.0, "step": 22 }, { "epoch": 0.007851169141491723, "grad_norm": 2.1829198280538287, "learning_rate": 1.2514220705346987e-06, "loss": 0.4965, "num_tokens": 4303227.0, "step": 23 }, { "epoch": 0.00819252432155658, "grad_norm": 1.8884964255314074, "learning_rate": 1.3083048919226394e-06, "loss": 0.4508, "num_tokens": 4522418.0, "step": 24 }, { "epoch": 0.008533879501621437, "grad_norm": 1.7533241869708998, "learning_rate": 1.3651877133105804e-06, "loss": 0.4038, "num_tokens": 4706847.0, "step": 25 }, { "epoch": 0.008875234681686295, "grad_norm": 1.5206627752716113, "learning_rate": 1.4220705346985211e-06, "loss": 0.4516, "num_tokens": 4883581.0, "step": 26 }, { "epoch": 0.009216589861751152, "grad_norm": 1.260515200679722, "learning_rate": 1.4789533560864619e-06, "loss": 0.4181, "num_tokens": 5049870.0, "step": 27 }, { "epoch": 0.009557945041816009, "grad_norm": 1.140863518154678, "learning_rate": 1.5358361774744028e-06, "loss": 0.453, "num_tokens": 5246150.0, "step": 28 }, { "epoch": 0.009899300221880868, "grad_norm": 1.0859417190956067, "learning_rate": 1.5927189988623436e-06, "loss": 0.4247, "num_tokens": 5444795.0, "step": 29 }, { "epoch": 0.010240655401945725, "grad_norm": 1.0098525451430775, "learning_rate": 1.6496018202502846e-06, "loss": 0.4193, "num_tokens": 5631065.0, "step": 30 }, { "epoch": 0.010582010582010581, "grad_norm": 0.8424954455576684, "learning_rate": 1.7064846416382253e-06, "loss": 0.4334, "num_tokens": 5853993.0, "step": 31 }, { "epoch": 0.01092336576207544, "grad_norm": 0.8838434850909672, "learning_rate": 1.763367463026166e-06, "loss": 0.4068, "num_tokens": 6047457.0, "step": 32 }, { "epoch": 0.011264720942140297, "grad_norm": 0.7882464121744107, "learning_rate": 1.820250284414107e-06, "loss": 0.4021, "num_tokens": 6281074.0, "step": 33 }, { "epoch": 0.011606076122205154, "grad_norm": 0.6635969884894837, "learning_rate": 1.877133105802048e-06, "loss": 0.4103, "num_tokens": 6467578.0, "step": 34 }, { "epoch": 0.011947431302270013, "grad_norm": 0.5118115315910249, "learning_rate": 1.934015927189989e-06, "loss": 0.3939, "num_tokens": 6662346.0, "step": 35 }, { "epoch": 0.01228878648233487, "grad_norm": 0.5565421534322703, "learning_rate": 1.9908987485779297e-06, "loss": 0.4158, "num_tokens": 6880388.0, "step": 36 }, { "epoch": 0.012630141662399726, "grad_norm": 0.5679514068314888, "learning_rate": 2.0477815699658705e-06, "loss": 0.3637, "num_tokens": 7080524.0, "step": 37 }, { "epoch": 0.012971496842464585, "grad_norm": 0.6314319198006353, "learning_rate": 2.1046643913538112e-06, "loss": 0.3859, "num_tokens": 7245557.0, "step": 38 }, { "epoch": 0.013312852022529442, "grad_norm": 0.6943049392856028, "learning_rate": 2.1615472127417524e-06, "loss": 0.4253, "num_tokens": 7457248.0, "step": 39 }, { "epoch": 0.013654207202594299, "grad_norm": 0.6190247412039139, "learning_rate": 2.218430034129693e-06, "loss": 0.3732, "num_tokens": 7653948.0, "step": 40 }, { "epoch": 0.013995562382659157, "grad_norm": 0.6406298703711564, "learning_rate": 2.275312855517634e-06, "loss": 0.3875, "num_tokens": 7808734.0, "step": 41 }, { "epoch": 0.014336917562724014, "grad_norm": 0.6101371713065508, "learning_rate": 2.3321956769055746e-06, "loss": 0.4026, "num_tokens": 8011456.0, "step": 42 }, { "epoch": 0.014678272742788871, "grad_norm": 0.5372293190150949, "learning_rate": 2.389078498293516e-06, "loss": 0.3724, "num_tokens": 8222793.0, "step": 43 }, { "epoch": 0.01501962792285373, "grad_norm": 0.5674335139843883, "learning_rate": 2.4459613196814566e-06, "loss": 0.3705, "num_tokens": 8367661.0, "step": 44 }, { "epoch": 0.015360983102918587, "grad_norm": 0.48032260756000655, "learning_rate": 2.5028441410693973e-06, "loss": 0.3569, "num_tokens": 8544402.0, "step": 45 }, { "epoch": 0.015702338282983445, "grad_norm": 0.46917188009554056, "learning_rate": 2.559726962457338e-06, "loss": 0.4138, "num_tokens": 8741688.0, "step": 46 }, { "epoch": 0.016043693463048302, "grad_norm": 0.4605972315340043, "learning_rate": 2.616609783845279e-06, "loss": 0.3735, "num_tokens": 8914428.0, "step": 47 }, { "epoch": 0.01638504864311316, "grad_norm": 0.42299798920119464, "learning_rate": 2.67349260523322e-06, "loss": 0.3858, "num_tokens": 9111990.0, "step": 48 }, { "epoch": 0.016726403823178016, "grad_norm": 0.37725965621907376, "learning_rate": 2.7303754266211608e-06, "loss": 0.3507, "num_tokens": 9301220.0, "step": 49 }, { "epoch": 0.017067759003242873, "grad_norm": 0.42229659208791137, "learning_rate": 2.7872582480091015e-06, "loss": 0.3943, "num_tokens": 9492929.0, "step": 50 }, { "epoch": 0.017409114183307733, "grad_norm": 0.41588303980575403, "learning_rate": 2.8441410693970423e-06, "loss": 0.3749, "num_tokens": 9677303.0, "step": 51 }, { "epoch": 0.01775046936337259, "grad_norm": 0.38546672785466196, "learning_rate": 2.901023890784983e-06, "loss": 0.356, "num_tokens": 9866288.0, "step": 52 }, { "epoch": 0.018091824543437447, "grad_norm": 0.4387778506284943, "learning_rate": 2.9579067121729238e-06, "loss": 0.3652, "num_tokens": 10032140.0, "step": 53 }, { "epoch": 0.018433179723502304, "grad_norm": 0.4166708574777135, "learning_rate": 3.014789533560865e-06, "loss": 0.3733, "num_tokens": 10229575.0, "step": 54 }, { "epoch": 0.01877453490356716, "grad_norm": 0.38541267352932246, "learning_rate": 3.0716723549488057e-06, "loss": 0.3424, "num_tokens": 10407271.0, "step": 55 }, { "epoch": 0.019115890083632018, "grad_norm": 0.4269633569922304, "learning_rate": 3.1285551763367464e-06, "loss": 0.3878, "num_tokens": 10557101.0, "step": 56 }, { "epoch": 0.01945724526369688, "grad_norm": 0.3645821505388399, "learning_rate": 3.185437997724687e-06, "loss": 0.3631, "num_tokens": 10757228.0, "step": 57 }, { "epoch": 0.019798600443761735, "grad_norm": 0.33373381863576507, "learning_rate": 3.242320819112628e-06, "loss": 0.3521, "num_tokens": 10956701.0, "step": 58 }, { "epoch": 0.020139955623826592, "grad_norm": 0.35213089440205025, "learning_rate": 3.299203640500569e-06, "loss": 0.3373, "num_tokens": 11179090.0, "step": 59 }, { "epoch": 0.02048131080389145, "grad_norm": 0.37767206329063646, "learning_rate": 3.35608646188851e-06, "loss": 0.3652, "num_tokens": 11350772.0, "step": 60 }, { "epoch": 0.020822665983956306, "grad_norm": 0.3701487649597873, "learning_rate": 3.4129692832764506e-06, "loss": 0.3406, "num_tokens": 11505141.0, "step": 61 }, { "epoch": 0.021164021164021163, "grad_norm": 0.3984098500233418, "learning_rate": 3.4698521046643914e-06, "loss": 0.412, "num_tokens": 11680368.0, "step": 62 }, { "epoch": 0.021505376344086023, "grad_norm": 0.3582103220791267, "learning_rate": 3.526734926052332e-06, "loss": 0.3681, "num_tokens": 11872798.0, "step": 63 }, { "epoch": 0.02184673152415088, "grad_norm": 0.38889091585818003, "learning_rate": 3.583617747440273e-06, "loss": 0.3478, "num_tokens": 11998954.0, "step": 64 }, { "epoch": 0.022188086704215737, "grad_norm": 0.3508066384064169, "learning_rate": 3.640500568828214e-06, "loss": 0.3924, "num_tokens": 12185688.0, "step": 65 }, { "epoch": 0.022529441884280594, "grad_norm": 0.3196599926066783, "learning_rate": 3.697383390216155e-06, "loss": 0.3666, "num_tokens": 12382209.0, "step": 66 }, { "epoch": 0.02287079706434545, "grad_norm": 0.2981913779217595, "learning_rate": 3.754266211604096e-06, "loss": 0.3682, "num_tokens": 12615625.0, "step": 67 }, { "epoch": 0.023212152244410308, "grad_norm": 0.3386106062795658, "learning_rate": 3.8111490329920367e-06, "loss": 0.3726, "num_tokens": 12833944.0, "step": 68 }, { "epoch": 0.023553507424475168, "grad_norm": 0.32660655309814485, "learning_rate": 3.868031854379978e-06, "loss": 0.3626, "num_tokens": 13043388.0, "step": 69 }, { "epoch": 0.023894862604540025, "grad_norm": 0.33420871106661854, "learning_rate": 3.924914675767919e-06, "loss": 0.3622, "num_tokens": 13198806.0, "step": 70 }, { "epoch": 0.024236217784604882, "grad_norm": 0.32367439322563474, "learning_rate": 3.981797497155859e-06, "loss": 0.3388, "num_tokens": 13378583.0, "step": 71 }, { "epoch": 0.02457757296466974, "grad_norm": 0.31897645235180694, "learning_rate": 4.0386803185438e-06, "loss": 0.3736, "num_tokens": 13566965.0, "step": 72 }, { "epoch": 0.024918928144734596, "grad_norm": 0.29907084174405013, "learning_rate": 4.095563139931741e-06, "loss": 0.3608, "num_tokens": 13771597.0, "step": 73 }, { "epoch": 0.025260283324799453, "grad_norm": 0.3636939684311507, "learning_rate": 4.152445961319682e-06, "loss": 0.3679, "num_tokens": 13924876.0, "step": 74 }, { "epoch": 0.025601638504864313, "grad_norm": 0.3032018018416808, "learning_rate": 4.2093287827076224e-06, "loss": 0.344, "num_tokens": 14110195.0, "step": 75 }, { "epoch": 0.02594299368492917, "grad_norm": 0.3127584609196273, "learning_rate": 4.266211604095564e-06, "loss": 0.3591, "num_tokens": 14306402.0, "step": 76 }, { "epoch": 0.026284348864994027, "grad_norm": 0.2758047237272156, "learning_rate": 4.323094425483505e-06, "loss": 0.3435, "num_tokens": 14537993.0, "step": 77 }, { "epoch": 0.026625704045058884, "grad_norm": 0.34422831681893756, "learning_rate": 4.3799772468714455e-06, "loss": 0.3883, "num_tokens": 14708791.0, "step": 78 }, { "epoch": 0.02696705922512374, "grad_norm": 0.30627629521798716, "learning_rate": 4.436860068259386e-06, "loss": 0.3794, "num_tokens": 14923633.0, "step": 79 }, { "epoch": 0.027308414405188598, "grad_norm": 0.33099072318177625, "learning_rate": 4.493742889647327e-06, "loss": 0.3467, "num_tokens": 15097052.0, "step": 80 }, { "epoch": 0.027649769585253458, "grad_norm": 0.34358215080583315, "learning_rate": 4.550625711035268e-06, "loss": 0.3491, "num_tokens": 15253492.0, "step": 81 }, { "epoch": 0.027991124765318315, "grad_norm": 0.3148295199558088, "learning_rate": 4.6075085324232085e-06, "loss": 0.3809, "num_tokens": 15447325.0, "step": 82 }, { "epoch": 0.028332479945383172, "grad_norm": 0.3206265984228895, "learning_rate": 4.664391353811149e-06, "loss": 0.3754, "num_tokens": 15657223.0, "step": 83 }, { "epoch": 0.02867383512544803, "grad_norm": 0.31347927682311616, "learning_rate": 4.72127417519909e-06, "loss": 0.3832, "num_tokens": 15881839.0, "step": 84 }, { "epoch": 0.029015190305512886, "grad_norm": 0.2895018151240942, "learning_rate": 4.778156996587032e-06, "loss": 0.3611, "num_tokens": 16097495.0, "step": 85 }, { "epoch": 0.029356545485577742, "grad_norm": 0.322563756346877, "learning_rate": 4.835039817974972e-06, "loss": 0.3536, "num_tokens": 16286926.0, "step": 86 }, { "epoch": 0.029697900665642603, "grad_norm": 0.31903331448857325, "learning_rate": 4.891922639362913e-06, "loss": 0.366, "num_tokens": 16494062.0, "step": 87 }, { "epoch": 0.03003925584570746, "grad_norm": 0.3586127832762205, "learning_rate": 4.948805460750854e-06, "loss": 0.3536, "num_tokens": 16689912.0, "step": 88 }, { "epoch": 0.030380611025772317, "grad_norm": 0.3234790097342354, "learning_rate": 5.005688282138795e-06, "loss": 0.3397, "num_tokens": 16845851.0, "step": 89 }, { "epoch": 0.030721966205837174, "grad_norm": 0.31994931019338635, "learning_rate": 5.062571103526735e-06, "loss": 0.3469, "num_tokens": 17020556.0, "step": 90 }, { "epoch": 0.03106332138590203, "grad_norm": 0.36085929311797593, "learning_rate": 5.119453924914676e-06, "loss": 0.3856, "num_tokens": 17166596.0, "step": 91 }, { "epoch": 0.03140467656596689, "grad_norm": 0.3006435323326647, "learning_rate": 5.176336746302617e-06, "loss": 0.3547, "num_tokens": 17379357.0, "step": 92 }, { "epoch": 0.031746031746031744, "grad_norm": 0.30849415289374915, "learning_rate": 5.233219567690558e-06, "loss": 0.3491, "num_tokens": 17549416.0, "step": 93 }, { "epoch": 0.032087386926096605, "grad_norm": 0.3984086823250832, "learning_rate": 5.290102389078499e-06, "loss": 0.3616, "num_tokens": 17752382.0, "step": 94 }, { "epoch": 0.03242874210616146, "grad_norm": 0.29061104582182934, "learning_rate": 5.34698521046644e-06, "loss": 0.3671, "num_tokens": 17951208.0, "step": 95 }, { "epoch": 0.03277009728622632, "grad_norm": 0.2966649371258956, "learning_rate": 5.403868031854381e-06, "loss": 0.3864, "num_tokens": 18147836.0, "step": 96 }, { "epoch": 0.03311145246629118, "grad_norm": 0.30011422889958966, "learning_rate": 5.4607508532423215e-06, "loss": 0.3648, "num_tokens": 18322542.0, "step": 97 }, { "epoch": 0.03345280764635603, "grad_norm": 0.27053474034203434, "learning_rate": 5.517633674630262e-06, "loss": 0.3366, "num_tokens": 18545994.0, "step": 98 }, { "epoch": 0.03379416282642089, "grad_norm": 0.3131192444018379, "learning_rate": 5.574516496018203e-06, "loss": 0.3705, "num_tokens": 18726481.0, "step": 99 }, { "epoch": 0.034135518006485746, "grad_norm": 0.30412937299880044, "learning_rate": 5.631399317406144e-06, "loss": 0.3159, "num_tokens": 18882128.0, "step": 100 }, { "epoch": 0.034476873186550606, "grad_norm": 0.30732051741897815, "learning_rate": 5.6882821387940845e-06, "loss": 0.3715, "num_tokens": 19063362.0, "step": 101 }, { "epoch": 0.03481822836661547, "grad_norm": 0.25295194488839684, "learning_rate": 5.745164960182025e-06, "loss": 0.32, "num_tokens": 19291798.0, "step": 102 }, { "epoch": 0.03515958354668032, "grad_norm": 0.3115797837973871, "learning_rate": 5.802047781569966e-06, "loss": 0.4153, "num_tokens": 19482841.0, "step": 103 }, { "epoch": 0.03550093872674518, "grad_norm": 0.30762138670491795, "learning_rate": 5.858930602957907e-06, "loss": 0.3358, "num_tokens": 19635508.0, "step": 104 }, { "epoch": 0.035842293906810034, "grad_norm": 0.2754167848398137, "learning_rate": 5.9158134243458475e-06, "loss": 0.3535, "num_tokens": 19866140.0, "step": 105 }, { "epoch": 0.036183649086874894, "grad_norm": 0.30019533839675844, "learning_rate": 5.972696245733789e-06, "loss": 0.3582, "num_tokens": 20056914.0, "step": 106 }, { "epoch": 0.03652500426693975, "grad_norm": 0.3149611136241558, "learning_rate": 6.02957906712173e-06, "loss": 0.3465, "num_tokens": 20227308.0, "step": 107 }, { "epoch": 0.03686635944700461, "grad_norm": 0.3185174775602885, "learning_rate": 6.086461888509671e-06, "loss": 0.3501, "num_tokens": 20425157.0, "step": 108 }, { "epoch": 0.03720771462706947, "grad_norm": 0.3004629440069001, "learning_rate": 6.143344709897611e-06, "loss": 0.3394, "num_tokens": 20605334.0, "step": 109 }, { "epoch": 0.03754906980713432, "grad_norm": 0.30611162456431923, "learning_rate": 6.200227531285552e-06, "loss": 0.3326, "num_tokens": 20768374.0, "step": 110 }, { "epoch": 0.03789042498719918, "grad_norm": 0.31091107096057846, "learning_rate": 6.257110352673493e-06, "loss": 0.3783, "num_tokens": 20943651.0, "step": 111 }, { "epoch": 0.038231780167264036, "grad_norm": 0.27988599797971936, "learning_rate": 6.313993174061434e-06, "loss": 0.334, "num_tokens": 21143548.0, "step": 112 }, { "epoch": 0.038573135347328896, "grad_norm": 0.29044421645782786, "learning_rate": 6.370875995449374e-06, "loss": 0.3513, "num_tokens": 21313423.0, "step": 113 }, { "epoch": 0.03891449052739376, "grad_norm": 0.3305458295873714, "learning_rate": 6.427758816837315e-06, "loss": 0.3641, "num_tokens": 21473237.0, "step": 114 }, { "epoch": 0.03925584570745861, "grad_norm": 0.27710985002874516, "learning_rate": 6.484641638225256e-06, "loss": 0.349, "num_tokens": 21678693.0, "step": 115 }, { "epoch": 0.03959720088752347, "grad_norm": 0.26755693736166536, "learning_rate": 6.5415244596131975e-06, "loss": 0.3412, "num_tokens": 21875067.0, "step": 116 }, { "epoch": 0.039938556067588324, "grad_norm": 0.29536382983253295, "learning_rate": 6.598407281001138e-06, "loss": 0.3928, "num_tokens": 22062985.0, "step": 117 }, { "epoch": 0.040279911247653184, "grad_norm": 0.3044672906587042, "learning_rate": 6.655290102389079e-06, "loss": 0.386, "num_tokens": 22247589.0, "step": 118 }, { "epoch": 0.04062126642771804, "grad_norm": 0.2892727645209725, "learning_rate": 6.71217292377702e-06, "loss": 0.3343, "num_tokens": 22442003.0, "step": 119 }, { "epoch": 0.0409626216077829, "grad_norm": 0.3191660205972268, "learning_rate": 6.7690557451649605e-06, "loss": 0.3647, "num_tokens": 22612123.0, "step": 120 }, { "epoch": 0.04130397678784776, "grad_norm": 0.2863862724195555, "learning_rate": 6.825938566552901e-06, "loss": 0.3651, "num_tokens": 22832334.0, "step": 121 }, { "epoch": 0.04164533196791261, "grad_norm": 0.29559246011187323, "learning_rate": 6.882821387940842e-06, "loss": 0.331, "num_tokens": 23035407.0, "step": 122 }, { "epoch": 0.04198668714797747, "grad_norm": 0.32663873759339934, "learning_rate": 6.939704209328783e-06, "loss": 0.3445, "num_tokens": 23213527.0, "step": 123 }, { "epoch": 0.042328042328042326, "grad_norm": 0.31569402228951865, "learning_rate": 6.9965870307167235e-06, "loss": 0.348, "num_tokens": 23359535.0, "step": 124 }, { "epoch": 0.042669397508107186, "grad_norm": 0.29032092654827574, "learning_rate": 7.053469852104664e-06, "loss": 0.3744, "num_tokens": 23566898.0, "step": 125 }, { "epoch": 0.043010752688172046, "grad_norm": 0.2590932579200146, "learning_rate": 7.110352673492605e-06, "loss": 0.3781, "num_tokens": 23821008.0, "step": 126 }, { "epoch": 0.0433521078682369, "grad_norm": 0.4195656620922032, "learning_rate": 7.167235494880546e-06, "loss": 0.3995, "num_tokens": 23987627.0, "step": 127 }, { "epoch": 0.04369346304830176, "grad_norm": 0.28668665256743514, "learning_rate": 7.224118316268487e-06, "loss": 0.3492, "num_tokens": 24201332.0, "step": 128 }, { "epoch": 0.044034818228366614, "grad_norm": 0.2819186173780789, "learning_rate": 7.281001137656428e-06, "loss": 0.3792, "num_tokens": 24412339.0, "step": 129 }, { "epoch": 0.044376173408431474, "grad_norm": 0.3206153932599293, "learning_rate": 7.337883959044369e-06, "loss": 0.3418, "num_tokens": 24573571.0, "step": 130 }, { "epoch": 0.04471752858849633, "grad_norm": 0.2767790470786125, "learning_rate": 7.39476678043231e-06, "loss": 0.3608, "num_tokens": 24781424.0, "step": 131 }, { "epoch": 0.04505888376856119, "grad_norm": 0.31868954933395316, "learning_rate": 7.45164960182025e-06, "loss": 0.3538, "num_tokens": 24972889.0, "step": 132 }, { "epoch": 0.04540023894862605, "grad_norm": 0.3034530570534503, "learning_rate": 7.508532423208192e-06, "loss": 0.3605, "num_tokens": 25172921.0, "step": 133 }, { "epoch": 0.0457415941286909, "grad_norm": 0.3059754068687175, "learning_rate": 7.565415244596133e-06, "loss": 0.3567, "num_tokens": 25362829.0, "step": 134 }, { "epoch": 0.04608294930875576, "grad_norm": 0.2873556672473987, "learning_rate": 7.6222980659840735e-06, "loss": 0.3776, "num_tokens": 25580690.0, "step": 135 }, { "epoch": 0.046424304488820615, "grad_norm": 0.27173453536689923, "learning_rate": 7.679180887372015e-06, "loss": 0.3712, "num_tokens": 25789977.0, "step": 136 }, { "epoch": 0.046765659668885476, "grad_norm": 0.2626624405898973, "learning_rate": 7.736063708759956e-06, "loss": 0.3607, "num_tokens": 26043593.0, "step": 137 }, { "epoch": 0.047107014848950336, "grad_norm": 0.3110168124388157, "learning_rate": 7.792946530147897e-06, "loss": 0.35, "num_tokens": 26221175.0, "step": 138 }, { "epoch": 0.04744837002901519, "grad_norm": 0.29059668978522885, "learning_rate": 7.849829351535837e-06, "loss": 0.3539, "num_tokens": 26418674.0, "step": 139 }, { "epoch": 0.04778972520908005, "grad_norm": 0.3305557333848681, "learning_rate": 7.906712172923778e-06, "loss": 0.3472, "num_tokens": 26604952.0, "step": 140 }, { "epoch": 0.048131080389144903, "grad_norm": 0.3155570364094112, "learning_rate": 7.963594994311719e-06, "loss": 0.3256, "num_tokens": 26755603.0, "step": 141 }, { "epoch": 0.048472435569209764, "grad_norm": 0.3015406159083273, "learning_rate": 8.02047781569966e-06, "loss": 0.3442, "num_tokens": 26965088.0, "step": 142 }, { "epoch": 0.04881379074927462, "grad_norm": 0.3200407203738076, "learning_rate": 8.0773606370876e-06, "loss": 0.3221, "num_tokens": 27139570.0, "step": 143 }, { "epoch": 0.04915514592933948, "grad_norm": 0.30805701575261524, "learning_rate": 8.134243458475541e-06, "loss": 0.3618, "num_tokens": 27311300.0, "step": 144 }, { "epoch": 0.04949650110940434, "grad_norm": 0.36314976411451766, "learning_rate": 8.191126279863482e-06, "loss": 0.3755, "num_tokens": 27480273.0, "step": 145 }, { "epoch": 0.04983785628946919, "grad_norm": 0.29742666936047635, "learning_rate": 8.248009101251423e-06, "loss": 0.3421, "num_tokens": 27666571.0, "step": 146 }, { "epoch": 0.05017921146953405, "grad_norm": 0.29309032171427635, "learning_rate": 8.304891922639363e-06, "loss": 0.3242, "num_tokens": 27849356.0, "step": 147 }, { "epoch": 0.050520566649598905, "grad_norm": 0.30578770502832714, "learning_rate": 8.361774744027304e-06, "loss": 0.3553, "num_tokens": 28028083.0, "step": 148 }, { "epoch": 0.050861921829663766, "grad_norm": 0.2822473078303481, "learning_rate": 8.418657565415245e-06, "loss": 0.3656, "num_tokens": 28238456.0, "step": 149 }, { "epoch": 0.051203277009728626, "grad_norm": 0.2808785606377243, "learning_rate": 8.475540386803186e-06, "loss": 0.3175, "num_tokens": 28405550.0, "step": 150 }, { "epoch": 0.05154463218979348, "grad_norm": 0.3150744322272409, "learning_rate": 8.532423208191128e-06, "loss": 0.3546, "num_tokens": 28623854.0, "step": 151 }, { "epoch": 0.05188598736985834, "grad_norm": 0.27748071767975657, "learning_rate": 8.589306029579069e-06, "loss": 0.3598, "num_tokens": 28813096.0, "step": 152 }, { "epoch": 0.05222734254992319, "grad_norm": 0.2766380173929495, "learning_rate": 8.64618885096701e-06, "loss": 0.3371, "num_tokens": 29034233.0, "step": 153 }, { "epoch": 0.052568697729988054, "grad_norm": 0.27736117846325603, "learning_rate": 8.70307167235495e-06, "loss": 0.342, "num_tokens": 29221820.0, "step": 154 }, { "epoch": 0.05291005291005291, "grad_norm": 0.26333421652305594, "learning_rate": 8.759954493742891e-06, "loss": 0.3684, "num_tokens": 29450427.0, "step": 155 }, { "epoch": 0.05325140809011777, "grad_norm": 0.2774195008426368, "learning_rate": 8.816837315130832e-06, "loss": 0.3472, "num_tokens": 29649783.0, "step": 156 }, { "epoch": 0.05359276327018263, "grad_norm": 0.31028737950378465, "learning_rate": 8.873720136518773e-06, "loss": 0.3924, "num_tokens": 29850923.0, "step": 157 }, { "epoch": 0.05393411845024748, "grad_norm": 0.3145368222177463, "learning_rate": 8.930602957906713e-06, "loss": 0.344, "num_tokens": 30003552.0, "step": 158 }, { "epoch": 0.05427547363031234, "grad_norm": 0.30751199930713113, "learning_rate": 8.987485779294654e-06, "loss": 0.3743, "num_tokens": 30215036.0, "step": 159 }, { "epoch": 0.054616828810377195, "grad_norm": 0.5580492462275177, "learning_rate": 9.044368600682595e-06, "loss": 0.3351, "num_tokens": 30381298.0, "step": 160 }, { "epoch": 0.054958183990442055, "grad_norm": 0.30368799860168816, "learning_rate": 9.101251422070536e-06, "loss": 0.3742, "num_tokens": 30578222.0, "step": 161 }, { "epoch": 0.055299539170506916, "grad_norm": 0.27793058993057884, "learning_rate": 9.158134243458476e-06, "loss": 0.3501, "num_tokens": 30772285.0, "step": 162 }, { "epoch": 0.05564089435057177, "grad_norm": 0.3325786979435578, "learning_rate": 9.215017064846417e-06, "loss": 0.3517, "num_tokens": 30947308.0, "step": 163 }, { "epoch": 0.05598224953063663, "grad_norm": 0.30747504563502237, "learning_rate": 9.271899886234358e-06, "loss": 0.3509, "num_tokens": 31137923.0, "step": 164 }, { "epoch": 0.05632360471070148, "grad_norm": 0.2783804241100197, "learning_rate": 9.328782707622299e-06, "loss": 0.324, "num_tokens": 31329230.0, "step": 165 }, { "epoch": 0.056664959890766343, "grad_norm": 0.293555148508486, "learning_rate": 9.38566552901024e-06, "loss": 0.3249, "num_tokens": 31506443.0, "step": 166 }, { "epoch": 0.0570063150708312, "grad_norm": 0.3288136148195218, "learning_rate": 9.44254835039818e-06, "loss": 0.3727, "num_tokens": 31697905.0, "step": 167 }, { "epoch": 0.05734767025089606, "grad_norm": 0.3232971265401224, "learning_rate": 9.49943117178612e-06, "loss": 0.3705, "num_tokens": 31904863.0, "step": 168 }, { "epoch": 0.05768902543096092, "grad_norm": 0.29919658973367547, "learning_rate": 9.556313993174063e-06, "loss": 0.3792, "num_tokens": 32098631.0, "step": 169 }, { "epoch": 0.05803038061102577, "grad_norm": 0.28801998293376346, "learning_rate": 9.613196814562004e-06, "loss": 0.3505, "num_tokens": 32312417.0, "step": 170 }, { "epoch": 0.05837173579109063, "grad_norm": 0.3139885033173206, "learning_rate": 9.670079635949945e-06, "loss": 0.3181, "num_tokens": 32469308.0, "step": 171 }, { "epoch": 0.058713090971155485, "grad_norm": 0.2914896508946836, "learning_rate": 9.726962457337886e-06, "loss": 0.4038, "num_tokens": 32705439.0, "step": 172 }, { "epoch": 0.059054446151220345, "grad_norm": 0.2899248125656775, "learning_rate": 9.783845278725826e-06, "loss": 0.3553, "num_tokens": 32887153.0, "step": 173 }, { "epoch": 0.059395801331285206, "grad_norm": 0.30896029540083164, "learning_rate": 9.840728100113767e-06, "loss": 0.3604, "num_tokens": 33053931.0, "step": 174 }, { "epoch": 0.05973715651135006, "grad_norm": 0.33006591928771745, "learning_rate": 9.897610921501708e-06, "loss": 0.3516, "num_tokens": 33214381.0, "step": 175 }, { "epoch": 0.06007851169141492, "grad_norm": 0.29873146461538047, "learning_rate": 9.954493742889649e-06, "loss": 0.3825, "num_tokens": 33448587.0, "step": 176 }, { "epoch": 0.06041986687147977, "grad_norm": 0.32378279882727323, "learning_rate": 1.001137656427759e-05, "loss": 0.3566, "num_tokens": 33624334.0, "step": 177 }, { "epoch": 0.06076122205154463, "grad_norm": 0.31632963592414326, "learning_rate": 1.006825938566553e-05, "loss": 0.3507, "num_tokens": 33824036.0, "step": 178 }, { "epoch": 0.06110257723160949, "grad_norm": 0.30863594688303214, "learning_rate": 1.012514220705347e-05, "loss": 0.3324, "num_tokens": 33980670.0, "step": 179 }, { "epoch": 0.06144393241167435, "grad_norm": 0.3045364942520786, "learning_rate": 1.0182025028441412e-05, "loss": 0.3496, "num_tokens": 34188000.0, "step": 180 }, { "epoch": 0.06178528759173921, "grad_norm": 0.32169957661219684, "learning_rate": 1.0238907849829352e-05, "loss": 0.3695, "num_tokens": 34366670.0, "step": 181 }, { "epoch": 0.06212664277180406, "grad_norm": 0.3375484693073356, "learning_rate": 1.0295790671217293e-05, "loss": 0.3471, "num_tokens": 34559012.0, "step": 182 }, { "epoch": 0.06246799795186892, "grad_norm": 0.31231503499475677, "learning_rate": 1.0352673492605234e-05, "loss": 0.3779, "num_tokens": 34753722.0, "step": 183 }, { "epoch": 0.06280935313193378, "grad_norm": 0.3246415798620299, "learning_rate": 1.0409556313993175e-05, "loss": 0.3317, "num_tokens": 34916973.0, "step": 184 }, { "epoch": 0.06315070831199864, "grad_norm": 0.2761432088584156, "learning_rate": 1.0466439135381115e-05, "loss": 0.371, "num_tokens": 35117139.0, "step": 185 }, { "epoch": 0.06349206349206349, "grad_norm": 0.3215583384547561, "learning_rate": 1.0523321956769056e-05, "loss": 0.3452, "num_tokens": 35303526.0, "step": 186 }, { "epoch": 0.06383341867212836, "grad_norm": 0.33408336370367026, "learning_rate": 1.0580204778156999e-05, "loss": 0.3584, "num_tokens": 35456446.0, "step": 187 }, { "epoch": 0.06417477385219321, "grad_norm": 0.3028255393996406, "learning_rate": 1.063708759954494e-05, "loss": 0.3663, "num_tokens": 35648055.0, "step": 188 }, { "epoch": 0.06451612903225806, "grad_norm": 0.29077985398089723, "learning_rate": 1.069397042093288e-05, "loss": 0.3484, "num_tokens": 35842924.0, "step": 189 }, { "epoch": 0.06485748421232292, "grad_norm": 0.2986798787440169, "learning_rate": 1.075085324232082e-05, "loss": 0.3443, "num_tokens": 36024166.0, "step": 190 }, { "epoch": 0.06519883939238778, "grad_norm": 0.33130017997050787, "learning_rate": 1.0807736063708762e-05, "loss": 0.3725, "num_tokens": 36204527.0, "step": 191 }, { "epoch": 0.06554019457245264, "grad_norm": 0.3364059333583251, "learning_rate": 1.0864618885096702e-05, "loss": 0.3841, "num_tokens": 36382430.0, "step": 192 }, { "epoch": 0.06588154975251749, "grad_norm": 0.29051436485096876, "learning_rate": 1.0921501706484643e-05, "loss": 0.3672, "num_tokens": 36595859.0, "step": 193 }, { "epoch": 0.06622290493258236, "grad_norm": 0.35243792454602363, "learning_rate": 1.0978384527872584e-05, "loss": 0.3903, "num_tokens": 36788210.0, "step": 194 }, { "epoch": 0.06656426011264721, "grad_norm": 0.2918301740209208, "learning_rate": 1.1035267349260525e-05, "loss": 0.3815, "num_tokens": 37002037.0, "step": 195 }, { "epoch": 0.06690561529271206, "grad_norm": 0.338850189473043, "learning_rate": 1.1092150170648465e-05, "loss": 0.3493, "num_tokens": 37185514.0, "step": 196 }, { "epoch": 0.06724697047277692, "grad_norm": 0.2690096042629072, "learning_rate": 1.1149032992036406e-05, "loss": 0.334, "num_tokens": 37381375.0, "step": 197 }, { "epoch": 0.06758832565284179, "grad_norm": 0.29737385373832925, "learning_rate": 1.1205915813424347e-05, "loss": 0.3372, "num_tokens": 37536286.0, "step": 198 }, { "epoch": 0.06792968083290664, "grad_norm": 0.30292198083700084, "learning_rate": 1.1262798634812288e-05, "loss": 0.3411, "num_tokens": 37711584.0, "step": 199 }, { "epoch": 0.06827103601297149, "grad_norm": 0.294119909215978, "learning_rate": 1.1319681456200228e-05, "loss": 0.3539, "num_tokens": 37911421.0, "step": 200 }, { "epoch": 0.06861239119303636, "grad_norm": 0.33276484516416194, "learning_rate": 1.1376564277588169e-05, "loss": 0.3651, "num_tokens": 38082135.0, "step": 201 }, { "epoch": 0.06895374637310121, "grad_norm": 0.30711413264170373, "learning_rate": 1.143344709897611e-05, "loss": 0.3757, "num_tokens": 38309065.0, "step": 202 }, { "epoch": 0.06929510155316607, "grad_norm": 0.3059562098730968, "learning_rate": 1.149032992036405e-05, "loss": 0.3717, "num_tokens": 38496880.0, "step": 203 }, { "epoch": 0.06963645673323093, "grad_norm": 0.31649577473566676, "learning_rate": 1.1547212741751991e-05, "loss": 0.3805, "num_tokens": 38686738.0, "step": 204 }, { "epoch": 0.06997781191329579, "grad_norm": 0.29898849435508823, "learning_rate": 1.1604095563139932e-05, "loss": 0.3322, "num_tokens": 38867091.0, "step": 205 }, { "epoch": 0.07031916709336064, "grad_norm": 0.32617941677274825, "learning_rate": 1.1660978384527873e-05, "loss": 0.3587, "num_tokens": 39027145.0, "step": 206 }, { "epoch": 0.0706605222734255, "grad_norm": 0.3038850286260691, "learning_rate": 1.1717861205915814e-05, "loss": 0.3638, "num_tokens": 39219183.0, "step": 207 }, { "epoch": 0.07100187745349036, "grad_norm": 0.3118761736249134, "learning_rate": 1.1774744027303754e-05, "loss": 0.3307, "num_tokens": 39376674.0, "step": 208 }, { "epoch": 0.07134323263355521, "grad_norm": 0.3356518196811603, "learning_rate": 1.1831626848691695e-05, "loss": 0.3848, "num_tokens": 39564290.0, "step": 209 }, { "epoch": 0.07168458781362007, "grad_norm": 0.3016386502390701, "learning_rate": 1.1888509670079638e-05, "loss": 0.3707, "num_tokens": 39793003.0, "step": 210 }, { "epoch": 0.07202594299368494, "grad_norm": 0.29958539367414244, "learning_rate": 1.1945392491467578e-05, "loss": 0.3613, "num_tokens": 39990322.0, "step": 211 }, { "epoch": 0.07236729817374979, "grad_norm": 0.36156577880077767, "learning_rate": 1.2002275312855519e-05, "loss": 0.359, "num_tokens": 40177664.0, "step": 212 }, { "epoch": 0.07270865335381464, "grad_norm": 0.30585793028147634, "learning_rate": 1.205915813424346e-05, "loss": 0.3504, "num_tokens": 40374111.0, "step": 213 }, { "epoch": 0.0730500085338795, "grad_norm": 0.3401087565640465, "learning_rate": 1.21160409556314e-05, "loss": 0.3577, "num_tokens": 40558203.0, "step": 214 }, { "epoch": 0.07339136371394436, "grad_norm": 0.2851212219028911, "learning_rate": 1.2172923777019341e-05, "loss": 0.3576, "num_tokens": 40756049.0, "step": 215 }, { "epoch": 0.07373271889400922, "grad_norm": 0.33751349822241533, "learning_rate": 1.2229806598407282e-05, "loss": 0.3538, "num_tokens": 40922127.0, "step": 216 }, { "epoch": 0.07407407407407407, "grad_norm": 0.3136920541825252, "learning_rate": 1.2286689419795223e-05, "loss": 0.3802, "num_tokens": 41108106.0, "step": 217 }, { "epoch": 0.07441542925413894, "grad_norm": 0.3213641158911109, "learning_rate": 1.2343572241183164e-05, "loss": 0.3779, "num_tokens": 41301648.0, "step": 218 }, { "epoch": 0.07475678443420379, "grad_norm": 0.28836202852027853, "learning_rate": 1.2400455062571104e-05, "loss": 0.3728, "num_tokens": 41530957.0, "step": 219 }, { "epoch": 0.07509813961426864, "grad_norm": 0.2976461238975202, "learning_rate": 1.2457337883959045e-05, "loss": 0.3587, "num_tokens": 41776351.0, "step": 220 }, { "epoch": 0.0754394947943335, "grad_norm": 0.33263676137182085, "learning_rate": 1.2514220705346986e-05, "loss": 0.359, "num_tokens": 41944464.0, "step": 221 }, { "epoch": 0.07578084997439836, "grad_norm": 0.3052858165414697, "learning_rate": 1.2571103526734927e-05, "loss": 0.3599, "num_tokens": 42150651.0, "step": 222 }, { "epoch": 0.07612220515446322, "grad_norm": 0.32003369437015455, "learning_rate": 1.2627986348122867e-05, "loss": 0.3653, "num_tokens": 42311315.0, "step": 223 }, { "epoch": 0.07646356033452807, "grad_norm": 0.3065484793641115, "learning_rate": 1.2684869169510808e-05, "loss": 0.3431, "num_tokens": 42492392.0, "step": 224 }, { "epoch": 0.07680491551459294, "grad_norm": 0.29465767430841844, "learning_rate": 1.2741751990898749e-05, "loss": 0.3447, "num_tokens": 42684069.0, "step": 225 }, { "epoch": 0.07714627069465779, "grad_norm": 0.3758897497464481, "learning_rate": 1.279863481228669e-05, "loss": 0.3782, "num_tokens": 42844798.0, "step": 226 }, { "epoch": 0.07748762587472265, "grad_norm": 0.3155466266095052, "learning_rate": 1.285551763367463e-05, "loss": 0.3773, "num_tokens": 43036218.0, "step": 227 }, { "epoch": 0.07782898105478751, "grad_norm": 0.27619377872609135, "learning_rate": 1.2912400455062571e-05, "loss": 0.3313, "num_tokens": 43228394.0, "step": 228 }, { "epoch": 0.07817033623485237, "grad_norm": 0.3149014763269292, "learning_rate": 1.2969283276450512e-05, "loss": 0.3504, "num_tokens": 43421818.0, "step": 229 }, { "epoch": 0.07851169141491722, "grad_norm": 0.4521692794360302, "learning_rate": 1.3026166097838454e-05, "loss": 0.4014, "num_tokens": 43617898.0, "step": 230 }, { "epoch": 0.07885304659498207, "grad_norm": 0.30731211416589405, "learning_rate": 1.3083048919226395e-05, "loss": 0.3628, "num_tokens": 43792884.0, "step": 231 }, { "epoch": 0.07919440177504694, "grad_norm": 0.3395114506265274, "learning_rate": 1.3139931740614336e-05, "loss": 0.3573, "num_tokens": 43953517.0, "step": 232 }, { "epoch": 0.0795357569551118, "grad_norm": 0.29801827079898546, "learning_rate": 1.3196814562002277e-05, "loss": 0.3738, "num_tokens": 44172197.0, "step": 233 }, { "epoch": 0.07987711213517665, "grad_norm": 0.298845678839733, "learning_rate": 1.3253697383390217e-05, "loss": 0.3483, "num_tokens": 44373021.0, "step": 234 }, { "epoch": 0.08021846731524152, "grad_norm": 0.3291310925566423, "learning_rate": 1.3310580204778158e-05, "loss": 0.3411, "num_tokens": 44541295.0, "step": 235 }, { "epoch": 0.08055982249530637, "grad_norm": 0.3401650664794016, "learning_rate": 1.3367463026166099e-05, "loss": 0.3577, "num_tokens": 44703822.0, "step": 236 }, { "epoch": 0.08090117767537122, "grad_norm": 0.2839475417563606, "learning_rate": 1.342434584755404e-05, "loss": 0.3238, "num_tokens": 44898843.0, "step": 237 }, { "epoch": 0.08124253285543608, "grad_norm": 0.3009328019201451, "learning_rate": 1.348122866894198e-05, "loss": 0.3436, "num_tokens": 45090273.0, "step": 238 }, { "epoch": 0.08158388803550094, "grad_norm": 0.3445962817211486, "learning_rate": 1.3538111490329921e-05, "loss": 0.3825, "num_tokens": 45283196.0, "step": 239 }, { "epoch": 0.0819252432155658, "grad_norm": 0.2910078304984117, "learning_rate": 1.3594994311717862e-05, "loss": 0.3654, "num_tokens": 45481532.0, "step": 240 }, { "epoch": 0.08226659839563065, "grad_norm": 0.3225097260473356, "learning_rate": 1.3651877133105803e-05, "loss": 0.3738, "num_tokens": 45672515.0, "step": 241 }, { "epoch": 0.08260795357569552, "grad_norm": 0.3712387122805429, "learning_rate": 1.3708759954493743e-05, "loss": 0.3801, "num_tokens": 45817293.0, "step": 242 }, { "epoch": 0.08294930875576037, "grad_norm": 0.31643781606686716, "learning_rate": 1.3765642775881684e-05, "loss": 0.3577, "num_tokens": 46005082.0, "step": 243 }, { "epoch": 0.08329066393582522, "grad_norm": 0.3034951637032292, "learning_rate": 1.3822525597269625e-05, "loss": 0.3458, "num_tokens": 46179599.0, "step": 244 }, { "epoch": 0.08363201911589008, "grad_norm": 0.31782926883612667, "learning_rate": 1.3879408418657566e-05, "loss": 0.3763, "num_tokens": 46361931.0, "step": 245 }, { "epoch": 0.08397337429595494, "grad_norm": 0.30211268648367745, "learning_rate": 1.3936291240045506e-05, "loss": 0.352, "num_tokens": 46560921.0, "step": 246 }, { "epoch": 0.0843147294760198, "grad_norm": 0.31084102454094764, "learning_rate": 1.3993174061433447e-05, "loss": 0.3717, "num_tokens": 46753910.0, "step": 247 }, { "epoch": 0.08465608465608465, "grad_norm": 0.28612081540579976, "learning_rate": 1.4050056882821388e-05, "loss": 0.3497, "num_tokens": 46988915.0, "step": 248 }, { "epoch": 0.08499743983614952, "grad_norm": 0.3183119484603906, "learning_rate": 1.4106939704209329e-05, "loss": 0.3736, "num_tokens": 47214290.0, "step": 249 }, { "epoch": 0.08533879501621437, "grad_norm": 0.30834193717941116, "learning_rate": 1.416382252559727e-05, "loss": 0.3683, "num_tokens": 47401921.0, "step": 250 }, { "epoch": 0.08568015019627923, "grad_norm": 0.3006035000655092, "learning_rate": 1.422070534698521e-05, "loss": 0.3555, "num_tokens": 47607777.0, "step": 251 }, { "epoch": 0.08602150537634409, "grad_norm": 0.305040369358788, "learning_rate": 1.427758816837315e-05, "loss": 0.3985, "num_tokens": 47791912.0, "step": 252 }, { "epoch": 0.08636286055640895, "grad_norm": 0.29779600762287944, "learning_rate": 1.4334470989761092e-05, "loss": 0.3442, "num_tokens": 47974600.0, "step": 253 }, { "epoch": 0.0867042157364738, "grad_norm": 0.3173371882772404, "learning_rate": 1.4391353811149032e-05, "loss": 0.3656, "num_tokens": 48155535.0, "step": 254 }, { "epoch": 0.08704557091653865, "grad_norm": 0.29844724915190746, "learning_rate": 1.4448236632536975e-05, "loss": 0.3629, "num_tokens": 48351186.0, "step": 255 }, { "epoch": 0.08738692609660352, "grad_norm": 0.32307293278123356, "learning_rate": 1.4505119453924915e-05, "loss": 0.3643, "num_tokens": 48535740.0, "step": 256 }, { "epoch": 0.08772828127666837, "grad_norm": 0.33462670150786655, "learning_rate": 1.4562002275312856e-05, "loss": 0.3718, "num_tokens": 48721449.0, "step": 257 }, { "epoch": 0.08806963645673323, "grad_norm": 0.3102772958287875, "learning_rate": 1.4618885096700797e-05, "loss": 0.3325, "num_tokens": 48897336.0, "step": 258 }, { "epoch": 0.0884109916367981, "grad_norm": 0.3184075538435929, "learning_rate": 1.4675767918088738e-05, "loss": 0.3679, "num_tokens": 49094580.0, "step": 259 }, { "epoch": 0.08875234681686295, "grad_norm": 0.30418362177361896, "learning_rate": 1.4732650739476679e-05, "loss": 0.3898, "num_tokens": 49300319.0, "step": 260 }, { "epoch": 0.0890937019969278, "grad_norm": 0.3118691805868342, "learning_rate": 1.478953356086462e-05, "loss": 0.3831, "num_tokens": 49512716.0, "step": 261 }, { "epoch": 0.08943505717699266, "grad_norm": 0.36018423087520973, "learning_rate": 1.484641638225256e-05, "loss": 0.3973, "num_tokens": 49685741.0, "step": 262 }, { "epoch": 0.08977641235705752, "grad_norm": 0.31848512078325625, "learning_rate": 1.49032992036405e-05, "loss": 0.363, "num_tokens": 49844991.0, "step": 263 }, { "epoch": 0.09011776753712238, "grad_norm": 0.3098833128111047, "learning_rate": 1.4960182025028442e-05, "loss": 0.3239, "num_tokens": 50027611.0, "step": 264 }, { "epoch": 0.09045912271718723, "grad_norm": 0.31624073625568994, "learning_rate": 1.5017064846416384e-05, "loss": 0.3682, "num_tokens": 50217704.0, "step": 265 }, { "epoch": 0.0908004778972521, "grad_norm": 0.3001353691216866, "learning_rate": 1.5073947667804325e-05, "loss": 0.3308, "num_tokens": 50395437.0, "step": 266 }, { "epoch": 0.09114183307731695, "grad_norm": 0.32450635661716143, "learning_rate": 1.5130830489192265e-05, "loss": 0.337, "num_tokens": 50556967.0, "step": 267 }, { "epoch": 0.0914831882573818, "grad_norm": 0.30328927053331506, "learning_rate": 1.5187713310580206e-05, "loss": 0.3788, "num_tokens": 50738209.0, "step": 268 }, { "epoch": 0.09182454343744666, "grad_norm": 0.3294983290797316, "learning_rate": 1.5244596131968147e-05, "loss": 0.3767, "num_tokens": 50945554.0, "step": 269 }, { "epoch": 0.09216589861751152, "grad_norm": 0.33006774557290286, "learning_rate": 1.530147895335609e-05, "loss": 0.3695, "num_tokens": 51118796.0, "step": 270 }, { "epoch": 0.09250725379757638, "grad_norm": 0.31764493799909627, "learning_rate": 1.535836177474403e-05, "loss": 0.3503, "num_tokens": 51320627.0, "step": 271 }, { "epoch": 0.09284860897764123, "grad_norm": 0.3230829647702297, "learning_rate": 1.541524459613197e-05, "loss": 0.3846, "num_tokens": 51517960.0, "step": 272 }, { "epoch": 0.0931899641577061, "grad_norm": 0.32974571043984685, "learning_rate": 1.547212741751991e-05, "loss": 0.3639, "num_tokens": 51715211.0, "step": 273 }, { "epoch": 0.09353131933777095, "grad_norm": 0.31526925560937435, "learning_rate": 1.5529010238907852e-05, "loss": 0.3566, "num_tokens": 51889501.0, "step": 274 }, { "epoch": 0.0938726745178358, "grad_norm": 0.3599440733533943, "learning_rate": 1.5585893060295793e-05, "loss": 0.3775, "num_tokens": 52049532.0, "step": 275 }, { "epoch": 0.09421402969790067, "grad_norm": 0.3090429099965421, "learning_rate": 1.5642775881683734e-05, "loss": 0.3633, "num_tokens": 52202438.0, "step": 276 }, { "epoch": 0.09455538487796553, "grad_norm": 0.3462179833804099, "learning_rate": 1.5699658703071675e-05, "loss": 0.3815, "num_tokens": 52385373.0, "step": 277 }, { "epoch": 0.09489674005803038, "grad_norm": 0.3100803204910945, "learning_rate": 1.5756541524459615e-05, "loss": 0.3652, "num_tokens": 52565887.0, "step": 278 }, { "epoch": 0.09523809523809523, "grad_norm": 0.31186769090258554, "learning_rate": 1.5813424345847556e-05, "loss": 0.3979, "num_tokens": 52775148.0, "step": 279 }, { "epoch": 0.0955794504181601, "grad_norm": 0.3574147093920914, "learning_rate": 1.5870307167235497e-05, "loss": 0.3605, "num_tokens": 52936001.0, "step": 280 }, { "epoch": 0.09592080559822495, "grad_norm": 0.2838379432794604, "learning_rate": 1.5927189988623438e-05, "loss": 0.3298, "num_tokens": 53137811.0, "step": 281 }, { "epoch": 0.09626216077828981, "grad_norm": 0.30441569740759256, "learning_rate": 1.598407281001138e-05, "loss": 0.3474, "num_tokens": 53316695.0, "step": 282 }, { "epoch": 0.09660351595835467, "grad_norm": 0.30416821216609335, "learning_rate": 1.604095563139932e-05, "loss": 0.377, "num_tokens": 53535323.0, "step": 283 }, { "epoch": 0.09694487113841953, "grad_norm": 0.37620286140990544, "learning_rate": 1.609783845278726e-05, "loss": 0.3613, "num_tokens": 53699921.0, "step": 284 }, { "epoch": 0.09728622631848438, "grad_norm": 0.30760281033774894, "learning_rate": 1.61547212741752e-05, "loss": 0.375, "num_tokens": 53936033.0, "step": 285 }, { "epoch": 0.09762758149854923, "grad_norm": 0.3006662206415984, "learning_rate": 1.621160409556314e-05, "loss": 0.3407, "num_tokens": 54122310.0, "step": 286 }, { "epoch": 0.0979689366786141, "grad_norm": 0.31230275046495864, "learning_rate": 1.6268486916951082e-05, "loss": 0.3727, "num_tokens": 54306182.0, "step": 287 }, { "epoch": 0.09831029185867896, "grad_norm": 0.3294239372066118, "learning_rate": 1.6325369738339023e-05, "loss": 0.3561, "num_tokens": 54469530.0, "step": 288 }, { "epoch": 0.09865164703874381, "grad_norm": 0.27865312583429785, "learning_rate": 1.6382252559726964e-05, "loss": 0.352, "num_tokens": 54686360.0, "step": 289 }, { "epoch": 0.09899300221880868, "grad_norm": 0.3311212159647177, "learning_rate": 1.6439135381114904e-05, "loss": 0.3708, "num_tokens": 54843803.0, "step": 290 }, { "epoch": 0.09933435739887353, "grad_norm": 0.2862480584964377, "learning_rate": 1.6496018202502845e-05, "loss": 0.3354, "num_tokens": 55033767.0, "step": 291 }, { "epoch": 0.09967571257893838, "grad_norm": 0.2846304438442411, "learning_rate": 1.6552901023890786e-05, "loss": 0.3511, "num_tokens": 55233347.0, "step": 292 }, { "epoch": 0.10001706775900324, "grad_norm": 0.30522894561279323, "learning_rate": 1.6609783845278727e-05, "loss": 0.366, "num_tokens": 55408527.0, "step": 293 }, { "epoch": 0.1003584229390681, "grad_norm": 0.32039536273335706, "learning_rate": 1.6666666666666667e-05, "loss": 0.3853, "num_tokens": 55598270.0, "step": 294 }, { "epoch": 0.10069977811913296, "grad_norm": 0.2960219243519848, "learning_rate": 1.6723549488054608e-05, "loss": 0.3636, "num_tokens": 55815843.0, "step": 295 }, { "epoch": 0.10104113329919781, "grad_norm": 0.33085468917565836, "learning_rate": 1.678043230944255e-05, "loss": 0.3976, "num_tokens": 56002044.0, "step": 296 }, { "epoch": 0.10138248847926268, "grad_norm": 0.2940156832921654, "learning_rate": 1.683731513083049e-05, "loss": 0.366, "num_tokens": 56246098.0, "step": 297 }, { "epoch": 0.10172384365932753, "grad_norm": 0.31017306766856056, "learning_rate": 1.689419795221843e-05, "loss": 0.3641, "num_tokens": 56432720.0, "step": 298 }, { "epoch": 0.10206519883939238, "grad_norm": 0.2957236336558316, "learning_rate": 1.695108077360637e-05, "loss": 0.3761, "num_tokens": 56639668.0, "step": 299 }, { "epoch": 0.10240655401945725, "grad_norm": 0.3495029898058031, "learning_rate": 1.7007963594994315e-05, "loss": 0.388, "num_tokens": 56831805.0, "step": 300 }, { "epoch": 0.1027479091995221, "grad_norm": 0.3289294212562765, "learning_rate": 1.7064846416382256e-05, "loss": 0.3428, "num_tokens": 57020689.0, "step": 301 }, { "epoch": 0.10308926437958696, "grad_norm": 0.31010438712242416, "learning_rate": 1.7121729237770197e-05, "loss": 0.3591, "num_tokens": 57204579.0, "step": 302 }, { "epoch": 0.10343061955965181, "grad_norm": 0.30736213161387854, "learning_rate": 1.7178612059158138e-05, "loss": 0.327, "num_tokens": 57398106.0, "step": 303 }, { "epoch": 0.10377197473971668, "grad_norm": 0.32183124651584766, "learning_rate": 1.723549488054608e-05, "loss": 0.3538, "num_tokens": 57576203.0, "step": 304 }, { "epoch": 0.10411332991978153, "grad_norm": 0.29683711576258176, "learning_rate": 1.729237770193402e-05, "loss": 0.3363, "num_tokens": 57768600.0, "step": 305 }, { "epoch": 0.10445468509984639, "grad_norm": 0.30131631470479797, "learning_rate": 1.734926052332196e-05, "loss": 0.3694, "num_tokens": 57973409.0, "step": 306 }, { "epoch": 0.10479604027991125, "grad_norm": 0.30516196992364414, "learning_rate": 1.74061433447099e-05, "loss": 0.3487, "num_tokens": 58155797.0, "step": 307 }, { "epoch": 0.10513739545997611, "grad_norm": 0.3225002837602171, "learning_rate": 1.746302616609784e-05, "loss": 0.3655, "num_tokens": 58350267.0, "step": 308 }, { "epoch": 0.10547875064004096, "grad_norm": 0.3192723838917171, "learning_rate": 1.7519908987485782e-05, "loss": 0.3269, "num_tokens": 58497721.0, "step": 309 }, { "epoch": 0.10582010582010581, "grad_norm": 0.32345376344465565, "learning_rate": 1.7576791808873723e-05, "loss": 0.3733, "num_tokens": 58674559.0, "step": 310 }, { "epoch": 0.10616146100017068, "grad_norm": 0.31785748642549255, "learning_rate": 1.7633674630261664e-05, "loss": 0.3871, "num_tokens": 58861638.0, "step": 311 }, { "epoch": 0.10650281618023553, "grad_norm": 0.30537776970268643, "learning_rate": 1.7690557451649604e-05, "loss": 0.3379, "num_tokens": 59061166.0, "step": 312 }, { "epoch": 0.10684417136030039, "grad_norm": 0.3463830324233406, "learning_rate": 1.7747440273037545e-05, "loss": 0.3496, "num_tokens": 59231345.0, "step": 313 }, { "epoch": 0.10718552654036526, "grad_norm": 0.31580113222636835, "learning_rate": 1.7804323094425486e-05, "loss": 0.3408, "num_tokens": 59413588.0, "step": 314 }, { "epoch": 0.10752688172043011, "grad_norm": 0.3074928166126656, "learning_rate": 1.7861205915813427e-05, "loss": 0.3586, "num_tokens": 59589430.0, "step": 315 }, { "epoch": 0.10786823690049496, "grad_norm": 0.3111124667269044, "learning_rate": 1.7918088737201367e-05, "loss": 0.3411, "num_tokens": 59803969.0, "step": 316 }, { "epoch": 0.10820959208055982, "grad_norm": 0.33501291541071726, "learning_rate": 1.7974971558589308e-05, "loss": 0.3809, "num_tokens": 60003626.0, "step": 317 }, { "epoch": 0.10855094726062468, "grad_norm": 0.35327613204059405, "learning_rate": 1.803185437997725e-05, "loss": 0.3936, "num_tokens": 60201657.0, "step": 318 }, { "epoch": 0.10889230244068954, "grad_norm": 0.30959885752554817, "learning_rate": 1.808873720136519e-05, "loss": 0.3713, "num_tokens": 60367964.0, "step": 319 }, { "epoch": 0.10923365762075439, "grad_norm": 0.30569943608022887, "learning_rate": 1.814562002275313e-05, "loss": 0.3568, "num_tokens": 60563916.0, "step": 320 }, { "epoch": 0.10957501280081926, "grad_norm": 0.37772846298875173, "learning_rate": 1.820250284414107e-05, "loss": 0.3443, "num_tokens": 60740652.0, "step": 321 }, { "epoch": 0.10991636798088411, "grad_norm": 0.31655194467013, "learning_rate": 1.8259385665529012e-05, "loss": 0.3337, "num_tokens": 60910719.0, "step": 322 }, { "epoch": 0.11025772316094896, "grad_norm": 0.2989777178131285, "learning_rate": 1.8316268486916953e-05, "loss": 0.3457, "num_tokens": 61115893.0, "step": 323 }, { "epoch": 0.11059907834101383, "grad_norm": 0.44381703561694696, "learning_rate": 1.8373151308304893e-05, "loss": 0.3667, "num_tokens": 61329088.0, "step": 324 }, { "epoch": 0.11094043352107869, "grad_norm": 0.319358975301407, "learning_rate": 1.8430034129692834e-05, "loss": 0.3595, "num_tokens": 61540617.0, "step": 325 }, { "epoch": 0.11128178870114354, "grad_norm": 0.2853417842841346, "learning_rate": 1.8486916951080775e-05, "loss": 0.3233, "num_tokens": 61738322.0, "step": 326 }, { "epoch": 0.11162314388120839, "grad_norm": 0.3313189803688217, "learning_rate": 1.8543799772468716e-05, "loss": 0.3339, "num_tokens": 61928466.0, "step": 327 }, { "epoch": 0.11196449906127326, "grad_norm": 0.32567971530748707, "learning_rate": 1.8600682593856656e-05, "loss": 0.3625, "num_tokens": 62190468.0, "step": 328 }, { "epoch": 0.11230585424133811, "grad_norm": 0.3078639099233346, "learning_rate": 1.8657565415244597e-05, "loss": 0.3483, "num_tokens": 62350711.0, "step": 329 }, { "epoch": 0.11264720942140297, "grad_norm": 0.3034513307041438, "learning_rate": 1.8714448236632538e-05, "loss": 0.355, "num_tokens": 62573566.0, "step": 330 }, { "epoch": 0.11298856460146783, "grad_norm": 0.3531415879384215, "learning_rate": 1.877133105802048e-05, "loss": 0.3693, "num_tokens": 62791972.0, "step": 331 }, { "epoch": 0.11332991978153269, "grad_norm": 0.30317771015885275, "learning_rate": 1.882821387940842e-05, "loss": 0.3688, "num_tokens": 62999698.0, "step": 332 }, { "epoch": 0.11367127496159754, "grad_norm": 0.31924288865800116, "learning_rate": 1.888509670079636e-05, "loss": 0.382, "num_tokens": 63187150.0, "step": 333 }, { "epoch": 0.1140126301416624, "grad_norm": 0.3109289552354261, "learning_rate": 1.89419795221843e-05, "loss": 0.3432, "num_tokens": 63377239.0, "step": 334 }, { "epoch": 0.11435398532172726, "grad_norm": 0.3096576201193035, "learning_rate": 1.899886234357224e-05, "loss": 0.3589, "num_tokens": 63555030.0, "step": 335 }, { "epoch": 0.11469534050179211, "grad_norm": 0.29616137170349727, "learning_rate": 1.9055745164960186e-05, "loss": 0.326, "num_tokens": 63729859.0, "step": 336 }, { "epoch": 0.11503669568185697, "grad_norm": 0.3095974387247403, "learning_rate": 1.9112627986348127e-05, "loss": 0.3716, "num_tokens": 63927555.0, "step": 337 }, { "epoch": 0.11537805086192184, "grad_norm": 0.30681376907615565, "learning_rate": 1.9169510807736067e-05, "loss": 0.3491, "num_tokens": 64111303.0, "step": 338 }, { "epoch": 0.11571940604198669, "grad_norm": 0.308905637436516, "learning_rate": 1.9226393629124008e-05, "loss": 0.3567, "num_tokens": 64284510.0, "step": 339 }, { "epoch": 0.11606076122205154, "grad_norm": 0.29862890669731357, "learning_rate": 1.928327645051195e-05, "loss": 0.371, "num_tokens": 64474007.0, "step": 340 }, { "epoch": 0.1164021164021164, "grad_norm": 0.2925525652699544, "learning_rate": 1.934015927189989e-05, "loss": 0.3189, "num_tokens": 64669920.0, "step": 341 }, { "epoch": 0.11674347158218126, "grad_norm": 0.31131175678222567, "learning_rate": 1.939704209328783e-05, "loss": 0.3951, "num_tokens": 64867394.0, "step": 342 }, { "epoch": 0.11708482676224612, "grad_norm": 0.3159487525400559, "learning_rate": 1.945392491467577e-05, "loss": 0.3694, "num_tokens": 65053754.0, "step": 343 }, { "epoch": 0.11742618194231097, "grad_norm": 0.2867972198817954, "learning_rate": 1.9510807736063712e-05, "loss": 0.3445, "num_tokens": 65266231.0, "step": 344 }, { "epoch": 0.11776753712237584, "grad_norm": 0.31045214021681816, "learning_rate": 1.9567690557451653e-05, "loss": 0.3743, "num_tokens": 65473438.0, "step": 345 }, { "epoch": 0.11810889230244069, "grad_norm": 0.3006535354295515, "learning_rate": 1.9624573378839593e-05, "loss": 0.3553, "num_tokens": 65636119.0, "step": 346 }, { "epoch": 0.11845024748250554, "grad_norm": 0.3339269838137356, "learning_rate": 1.9681456200227534e-05, "loss": 0.3839, "num_tokens": 65806232.0, "step": 347 }, { "epoch": 0.11879160266257041, "grad_norm": 0.325399277482181, "learning_rate": 1.9738339021615475e-05, "loss": 0.3704, "num_tokens": 66001641.0, "step": 348 }, { "epoch": 0.11913295784263526, "grad_norm": 0.3276381686499699, "learning_rate": 1.9795221843003416e-05, "loss": 0.364, "num_tokens": 66172099.0, "step": 349 }, { "epoch": 0.11947431302270012, "grad_norm": 0.2852768995014164, "learning_rate": 1.9852104664391356e-05, "loss": 0.3515, "num_tokens": 66393658.0, "step": 350 }, { "epoch": 0.11981566820276497, "grad_norm": 0.3237055033220795, "learning_rate": 1.9908987485779297e-05, "loss": 0.3506, "num_tokens": 66555568.0, "step": 351 }, { "epoch": 0.12015702338282984, "grad_norm": 0.3131901677717563, "learning_rate": 1.9965870307167238e-05, "loss": 0.3882, "num_tokens": 66749343.0, "step": 352 }, { "epoch": 0.12049837856289469, "grad_norm": 0.33061988106372947, "learning_rate": 2.002275312855518e-05, "loss": 0.3849, "num_tokens": 66941459.0, "step": 353 }, { "epoch": 0.12083973374295955, "grad_norm": 0.3048368256487305, "learning_rate": 2.007963594994312e-05, "loss": 0.3693, "num_tokens": 67177592.0, "step": 354 }, { "epoch": 0.12118108892302441, "grad_norm": 0.3111109406212754, "learning_rate": 2.013651877133106e-05, "loss": 0.3901, "num_tokens": 67373767.0, "step": 355 }, { "epoch": 0.12152244410308927, "grad_norm": 0.31035130672709527, "learning_rate": 2.0193401592719e-05, "loss": 0.3591, "num_tokens": 67564086.0, "step": 356 }, { "epoch": 0.12186379928315412, "grad_norm": 0.31877653276895773, "learning_rate": 2.025028441410694e-05, "loss": 0.3692, "num_tokens": 67752729.0, "step": 357 }, { "epoch": 0.12220515446321897, "grad_norm": 0.35867824412097543, "learning_rate": 2.0307167235494882e-05, "loss": 0.3712, "num_tokens": 67957156.0, "step": 358 }, { "epoch": 0.12254650964328384, "grad_norm": 0.30394693459567496, "learning_rate": 2.0364050056882823e-05, "loss": 0.3477, "num_tokens": 68142790.0, "step": 359 }, { "epoch": 0.1228878648233487, "grad_norm": 0.31858266609673536, "learning_rate": 2.0420932878270764e-05, "loss": 0.3699, "num_tokens": 68330845.0, "step": 360 }, { "epoch": 0.12322922000341355, "grad_norm": 0.349443316500169, "learning_rate": 2.0477815699658705e-05, "loss": 0.4005, "num_tokens": 68535820.0, "step": 361 }, { "epoch": 0.12357057518347841, "grad_norm": 0.3349166468722116, "learning_rate": 2.0534698521046645e-05, "loss": 0.3894, "num_tokens": 68738817.0, "step": 362 }, { "epoch": 0.12391193036354327, "grad_norm": 0.3238842256141324, "learning_rate": 2.0591581342434586e-05, "loss": 0.3635, "num_tokens": 68931838.0, "step": 363 }, { "epoch": 0.12425328554360812, "grad_norm": 0.2768729871967442, "learning_rate": 2.0648464163822527e-05, "loss": 0.3209, "num_tokens": 69125725.0, "step": 364 }, { "epoch": 0.12459464072367298, "grad_norm": 0.3118846835432812, "learning_rate": 2.0705346985210468e-05, "loss": 0.366, "num_tokens": 69345645.0, "step": 365 }, { "epoch": 0.12493599590373784, "grad_norm": 0.3457369953513765, "learning_rate": 2.076222980659841e-05, "loss": 0.3788, "num_tokens": 69522580.0, "step": 366 }, { "epoch": 0.1252773510838027, "grad_norm": 0.30830978330316156, "learning_rate": 2.081911262798635e-05, "loss": 0.3641, "num_tokens": 69722270.0, "step": 367 }, { "epoch": 0.12561870626386756, "grad_norm": 0.3293599634474066, "learning_rate": 2.087599544937429e-05, "loss": 0.352, "num_tokens": 69891074.0, "step": 368 }, { "epoch": 0.1259600614439324, "grad_norm": 0.34195152927494427, "learning_rate": 2.093287827076223e-05, "loss": 0.3922, "num_tokens": 70058353.0, "step": 369 }, { "epoch": 0.12630141662399727, "grad_norm": 0.308153256763274, "learning_rate": 2.098976109215017e-05, "loss": 0.3263, "num_tokens": 70231603.0, "step": 370 }, { "epoch": 0.12664277180406214, "grad_norm": 0.3270050241633583, "learning_rate": 2.1046643913538112e-05, "loss": 0.3662, "num_tokens": 70419324.0, "step": 371 }, { "epoch": 0.12698412698412698, "grad_norm": 0.3132696587628713, "learning_rate": 2.1103526734926053e-05, "loss": 0.3822, "num_tokens": 70631778.0, "step": 372 }, { "epoch": 0.12732548216419184, "grad_norm": 0.33846753129541984, "learning_rate": 2.1160409556313997e-05, "loss": 0.3485, "num_tokens": 70801535.0, "step": 373 }, { "epoch": 0.1276668373442567, "grad_norm": 0.3244007013078995, "learning_rate": 2.1217292377701938e-05, "loss": 0.399, "num_tokens": 71004568.0, "step": 374 }, { "epoch": 0.12800819252432155, "grad_norm": 0.31207858428302665, "learning_rate": 2.127417519908988e-05, "loss": 0.3963, "num_tokens": 71195998.0, "step": 375 }, { "epoch": 0.12834954770438642, "grad_norm": 0.31600828796610647, "learning_rate": 2.133105802047782e-05, "loss": 0.3774, "num_tokens": 71368876.0, "step": 376 }, { "epoch": 0.12869090288445126, "grad_norm": 0.35231921337394234, "learning_rate": 2.138794084186576e-05, "loss": 0.374, "num_tokens": 71528212.0, "step": 377 }, { "epoch": 0.12903225806451613, "grad_norm": 0.2882210694907114, "learning_rate": 2.14448236632537e-05, "loss": 0.3449, "num_tokens": 71763351.0, "step": 378 }, { "epoch": 0.129373613244581, "grad_norm": 0.28684002334971664, "learning_rate": 2.150170648464164e-05, "loss": 0.3404, "num_tokens": 71960673.0, "step": 379 }, { "epoch": 0.12971496842464583, "grad_norm": 0.3149834037457891, "learning_rate": 2.1558589306029582e-05, "loss": 0.3604, "num_tokens": 72145596.0, "step": 380 }, { "epoch": 0.1300563236047107, "grad_norm": 0.29219707524949107, "learning_rate": 2.1615472127417523e-05, "loss": 0.351, "num_tokens": 72363299.0, "step": 381 }, { "epoch": 0.13039767878477557, "grad_norm": 0.2881147799743381, "learning_rate": 2.1672354948805464e-05, "loss": 0.3326, "num_tokens": 72553229.0, "step": 382 }, { "epoch": 0.1307390339648404, "grad_norm": 0.2916957780061931, "learning_rate": 2.1729237770193405e-05, "loss": 0.3463, "num_tokens": 72726842.0, "step": 383 }, { "epoch": 0.13108038914490527, "grad_norm": 0.29789337011093914, "learning_rate": 2.1786120591581345e-05, "loss": 0.3405, "num_tokens": 72920680.0, "step": 384 }, { "epoch": 0.13142174432497014, "grad_norm": 0.2964770207269157, "learning_rate": 2.1843003412969286e-05, "loss": 0.3571, "num_tokens": 73132917.0, "step": 385 }, { "epoch": 0.13176309950503498, "grad_norm": 0.2968083327566618, "learning_rate": 2.1899886234357227e-05, "loss": 0.3748, "num_tokens": 73334957.0, "step": 386 }, { "epoch": 0.13210445468509985, "grad_norm": 0.2744264978541339, "learning_rate": 2.1956769055745168e-05, "loss": 0.3883, "num_tokens": 73587419.0, "step": 387 }, { "epoch": 0.13244580986516472, "grad_norm": 0.2866806489503943, "learning_rate": 2.201365187713311e-05, "loss": 0.3439, "num_tokens": 73771172.0, "step": 388 }, { "epoch": 0.13278716504522955, "grad_norm": 0.3140343154117599, "learning_rate": 2.207053469852105e-05, "loss": 0.3684, "num_tokens": 73939108.0, "step": 389 }, { "epoch": 0.13312852022529442, "grad_norm": 0.288596484182983, "learning_rate": 2.212741751990899e-05, "loss": 0.3203, "num_tokens": 74110440.0, "step": 390 }, { "epoch": 0.1334698754053593, "grad_norm": 0.2955210722271284, "learning_rate": 2.218430034129693e-05, "loss": 0.3587, "num_tokens": 74312957.0, "step": 391 }, { "epoch": 0.13381123058542413, "grad_norm": 0.30998719270530023, "learning_rate": 2.224118316268487e-05, "loss": 0.363, "num_tokens": 74515723.0, "step": 392 }, { "epoch": 0.134152585765489, "grad_norm": 0.29473054751952654, "learning_rate": 2.2298065984072812e-05, "loss": 0.3574, "num_tokens": 74714871.0, "step": 393 }, { "epoch": 0.13449394094555384, "grad_norm": 0.31306389439128063, "learning_rate": 2.2354948805460753e-05, "loss": 0.367, "num_tokens": 74912748.0, "step": 394 }, { "epoch": 0.1348352961256187, "grad_norm": 0.33931110567645434, "learning_rate": 2.2411831626848694e-05, "loss": 0.3832, "num_tokens": 75091348.0, "step": 395 }, { "epoch": 0.13517665130568357, "grad_norm": 0.296897357008964, "learning_rate": 2.2468714448236634e-05, "loss": 0.3659, "num_tokens": 75280844.0, "step": 396 }, { "epoch": 0.1355180064857484, "grad_norm": 0.32363794120527445, "learning_rate": 2.2525597269624575e-05, "loss": 0.4053, "num_tokens": 75465971.0, "step": 397 }, { "epoch": 0.13585936166581328, "grad_norm": 0.353422226694476, "learning_rate": 2.2582480091012516e-05, "loss": 0.3564, "num_tokens": 75664486.0, "step": 398 }, { "epoch": 0.13620071684587814, "grad_norm": 0.2918464987470416, "learning_rate": 2.2639362912400457e-05, "loss": 0.3333, "num_tokens": 75858563.0, "step": 399 }, { "epoch": 0.13654207202594298, "grad_norm": 0.3401777857160627, "learning_rate": 2.2696245733788397e-05, "loss": 0.3842, "num_tokens": 76045063.0, "step": 400 }, { "epoch": 0.13688342720600785, "grad_norm": 0.29564389573887506, "learning_rate": 2.2753128555176338e-05, "loss": 0.3699, "num_tokens": 76265401.0, "step": 401 }, { "epoch": 0.13722478238607272, "grad_norm": 0.301614755325945, "learning_rate": 2.281001137656428e-05, "loss": 0.3254, "num_tokens": 76458263.0, "step": 402 }, { "epoch": 0.13756613756613756, "grad_norm": 0.3149213712811895, "learning_rate": 2.286689419795222e-05, "loss": 0.368, "num_tokens": 76647167.0, "step": 403 }, { "epoch": 0.13790749274620243, "grad_norm": 0.3072798196565177, "learning_rate": 2.292377701934016e-05, "loss": 0.3488, "num_tokens": 76836414.0, "step": 404 }, { "epoch": 0.1382488479262673, "grad_norm": 0.3224800360300367, "learning_rate": 2.29806598407281e-05, "loss": 0.3848, "num_tokens": 77015572.0, "step": 405 }, { "epoch": 0.13859020310633213, "grad_norm": 0.295435981201994, "learning_rate": 2.3037542662116042e-05, "loss": 0.3504, "num_tokens": 77205651.0, "step": 406 }, { "epoch": 0.138931558286397, "grad_norm": 0.32497711655939, "learning_rate": 2.3094425483503983e-05, "loss": 0.3755, "num_tokens": 77417738.0, "step": 407 }, { "epoch": 0.13927291346646187, "grad_norm": 0.3323212282530298, "learning_rate": 2.3151308304891923e-05, "loss": 0.3825, "num_tokens": 77610774.0, "step": 408 }, { "epoch": 0.1396142686465267, "grad_norm": 0.2674465263172731, "learning_rate": 2.3208191126279864e-05, "loss": 0.3529, "num_tokens": 77850075.0, "step": 409 }, { "epoch": 0.13995562382659157, "grad_norm": 0.3389497185383758, "learning_rate": 2.3265073947667805e-05, "loss": 0.3501, "num_tokens": 78066961.0, "step": 410 }, { "epoch": 0.1402969790066564, "grad_norm": 0.29875092650442275, "learning_rate": 2.3321956769055746e-05, "loss": 0.3993, "num_tokens": 78269897.0, "step": 411 }, { "epoch": 0.14063833418672128, "grad_norm": 0.3183067888434198, "learning_rate": 2.3378839590443686e-05, "loss": 0.3581, "num_tokens": 78465238.0, "step": 412 }, { "epoch": 0.14097968936678615, "grad_norm": 0.3152716217584142, "learning_rate": 2.3435722411831627e-05, "loss": 0.3658, "num_tokens": 78676931.0, "step": 413 }, { "epoch": 0.141321044546851, "grad_norm": 0.3026540156340894, "learning_rate": 2.3492605233219568e-05, "loss": 0.355, "num_tokens": 78850772.0, "step": 414 }, { "epoch": 0.14166239972691586, "grad_norm": 0.30222681512441524, "learning_rate": 2.354948805460751e-05, "loss": 0.3761, "num_tokens": 79048781.0, "step": 415 }, { "epoch": 0.14200375490698072, "grad_norm": 0.302595742465988, "learning_rate": 2.360637087599545e-05, "loss": 0.3458, "num_tokens": 79261110.0, "step": 416 }, { "epoch": 0.14234511008704556, "grad_norm": 0.28160555533211934, "learning_rate": 2.366325369738339e-05, "loss": 0.3282, "num_tokens": 79443004.0, "step": 417 }, { "epoch": 0.14268646526711043, "grad_norm": 0.34786683324421214, "learning_rate": 2.3720136518771334e-05, "loss": 0.3929, "num_tokens": 79657742.0, "step": 418 }, { "epoch": 0.1430278204471753, "grad_norm": 0.3479593553109153, "learning_rate": 2.3777019340159275e-05, "loss": 0.3642, "num_tokens": 79796155.0, "step": 419 }, { "epoch": 0.14336917562724014, "grad_norm": 0.28535438288873777, "learning_rate": 2.3833902161547216e-05, "loss": 0.3784, "num_tokens": 80056073.0, "step": 420 }, { "epoch": 0.143710530807305, "grad_norm": 0.30086629382470015, "learning_rate": 2.3890784982935157e-05, "loss": 0.3414, "num_tokens": 80224088.0, "step": 421 }, { "epoch": 0.14405188598736987, "grad_norm": 0.32914246904753774, "learning_rate": 2.3947667804323097e-05, "loss": 0.3663, "num_tokens": 80410358.0, "step": 422 }, { "epoch": 0.1443932411674347, "grad_norm": 0.29785534436194716, "learning_rate": 2.4004550625711038e-05, "loss": 0.3659, "num_tokens": 80605305.0, "step": 423 }, { "epoch": 0.14473459634749958, "grad_norm": 0.3235311056974496, "learning_rate": 2.406143344709898e-05, "loss": 0.3418, "num_tokens": 80790027.0, "step": 424 }, { "epoch": 0.14507595152756442, "grad_norm": 0.3090740044593302, "learning_rate": 2.411831626848692e-05, "loss": 0.376, "num_tokens": 81014150.0, "step": 425 }, { "epoch": 0.14541730670762928, "grad_norm": 0.3068194772356789, "learning_rate": 2.417519908987486e-05, "loss": 0.3574, "num_tokens": 81187451.0, "step": 426 }, { "epoch": 0.14575866188769415, "grad_norm": 0.45195780356636434, "learning_rate": 2.42320819112628e-05, "loss": 0.3736, "num_tokens": 81362656.0, "step": 427 }, { "epoch": 0.146100017067759, "grad_norm": 0.378837251787929, "learning_rate": 2.4288964732650742e-05, "loss": 0.3515, "num_tokens": 81519976.0, "step": 428 }, { "epoch": 0.14644137224782386, "grad_norm": 0.31220917946072807, "learning_rate": 2.4345847554038683e-05, "loss": 0.3553, "num_tokens": 81701365.0, "step": 429 }, { "epoch": 0.14678272742788873, "grad_norm": 0.3406236258839342, "learning_rate": 2.4402730375426623e-05, "loss": 0.3887, "num_tokens": 81877321.0, "step": 430 }, { "epoch": 0.14712408260795357, "grad_norm": 0.3051977908036161, "learning_rate": 2.4459613196814564e-05, "loss": 0.3629, "num_tokens": 82065983.0, "step": 431 }, { "epoch": 0.14746543778801843, "grad_norm": 0.2947888660183732, "learning_rate": 2.4516496018202505e-05, "loss": 0.3852, "num_tokens": 82261231.0, "step": 432 }, { "epoch": 0.1478067929680833, "grad_norm": 0.32575415529364166, "learning_rate": 2.4573378839590446e-05, "loss": 0.3819, "num_tokens": 82447680.0, "step": 433 }, { "epoch": 0.14814814814814814, "grad_norm": 0.2991760367817417, "learning_rate": 2.4630261660978386e-05, "loss": 0.3866, "num_tokens": 82680188.0, "step": 434 }, { "epoch": 0.148489503328213, "grad_norm": 0.2857359156619635, "learning_rate": 2.4687144482366327e-05, "loss": 0.3343, "num_tokens": 82882784.0, "step": 435 }, { "epoch": 0.14883085850827787, "grad_norm": 0.3339100016673958, "learning_rate": 2.4744027303754268e-05, "loss": 0.3787, "num_tokens": 83051734.0, "step": 436 }, { "epoch": 0.14917221368834271, "grad_norm": 0.39036176155277447, "learning_rate": 2.480091012514221e-05, "loss": 0.3783, "num_tokens": 83236387.0, "step": 437 }, { "epoch": 0.14951356886840758, "grad_norm": 0.30738224496932876, "learning_rate": 2.485779294653015e-05, "loss": 0.3911, "num_tokens": 83438794.0, "step": 438 }, { "epoch": 0.14985492404847245, "grad_norm": 0.30624342701277474, "learning_rate": 2.491467576791809e-05, "loss": 0.3473, "num_tokens": 83643729.0, "step": 439 }, { "epoch": 0.1501962792285373, "grad_norm": 0.3112844027012334, "learning_rate": 2.497155858930603e-05, "loss": 0.3457, "num_tokens": 83800797.0, "step": 440 }, { "epoch": 0.15053763440860216, "grad_norm": 0.36313109755013195, "learning_rate": 2.502844141069397e-05, "loss": 0.3662, "num_tokens": 83981806.0, "step": 441 }, { "epoch": 0.150878989588667, "grad_norm": 0.29135466171770485, "learning_rate": 2.5085324232081912e-05, "loss": 0.3236, "num_tokens": 84145659.0, "step": 442 }, { "epoch": 0.15122034476873186, "grad_norm": 0.33870487862337006, "learning_rate": 2.5142207053469853e-05, "loss": 0.3977, "num_tokens": 84308945.0, "step": 443 }, { "epoch": 0.15156169994879673, "grad_norm": 0.32717750291928643, "learning_rate": 2.5199089874857794e-05, "loss": 0.3549, "num_tokens": 84463813.0, "step": 444 }, { "epoch": 0.15190305512886157, "grad_norm": 0.33155423381318355, "learning_rate": 2.5255972696245735e-05, "loss": 0.3578, "num_tokens": 84644583.0, "step": 445 }, { "epoch": 0.15224441030892644, "grad_norm": 0.291842877839142, "learning_rate": 2.5312855517633675e-05, "loss": 0.337, "num_tokens": 84840484.0, "step": 446 }, { "epoch": 0.1525857654889913, "grad_norm": 0.3220224593737292, "learning_rate": 2.5369738339021616e-05, "loss": 0.3549, "num_tokens": 85039065.0, "step": 447 }, { "epoch": 0.15292712066905614, "grad_norm": 0.30578367922667826, "learning_rate": 2.5426621160409557e-05, "loss": 0.3708, "num_tokens": 85234075.0, "step": 448 }, { "epoch": 0.153268475849121, "grad_norm": 0.398688192031405, "learning_rate": 2.5483503981797498e-05, "loss": 0.3835, "num_tokens": 85405969.0, "step": 449 }, { "epoch": 0.15360983102918588, "grad_norm": 0.31191093781749024, "learning_rate": 2.554038680318544e-05, "loss": 0.3471, "num_tokens": 85577937.0, "step": 450 }, { "epoch": 0.15395118620925072, "grad_norm": 0.3151425531275972, "learning_rate": 2.559726962457338e-05, "loss": 0.3684, "num_tokens": 85763631.0, "step": 451 }, { "epoch": 0.15429254138931559, "grad_norm": 0.317456464860903, "learning_rate": 2.565415244596132e-05, "loss": 0.4061, "num_tokens": 85990459.0, "step": 452 }, { "epoch": 0.15463389656938045, "grad_norm": 0.3154649191801666, "learning_rate": 2.571103526734926e-05, "loss": 0.4147, "num_tokens": 86204673.0, "step": 453 }, { "epoch": 0.1549752517494453, "grad_norm": 0.3042753791334801, "learning_rate": 2.57679180887372e-05, "loss": 0.3874, "num_tokens": 86389812.0, "step": 454 }, { "epoch": 0.15531660692951016, "grad_norm": 0.3251313411292726, "learning_rate": 2.5824800910125142e-05, "loss": 0.3849, "num_tokens": 86622067.0, "step": 455 }, { "epoch": 0.15565796210957503, "grad_norm": 0.32735319131373286, "learning_rate": 2.5881683731513083e-05, "loss": 0.3839, "num_tokens": 86796362.0, "step": 456 }, { "epoch": 0.15599931728963987, "grad_norm": 0.3217193005818343, "learning_rate": 2.5938566552901024e-05, "loss": 0.3817, "num_tokens": 86960745.0, "step": 457 }, { "epoch": 0.15634067246970473, "grad_norm": 0.35090782256107317, "learning_rate": 2.5995449374288964e-05, "loss": 0.3563, "num_tokens": 87144648.0, "step": 458 }, { "epoch": 0.15668202764976957, "grad_norm": 0.2769440154671512, "learning_rate": 2.605233219567691e-05, "loss": 0.3579, "num_tokens": 87373039.0, "step": 459 }, { "epoch": 0.15702338282983444, "grad_norm": 0.30589072096445785, "learning_rate": 2.6109215017064846e-05, "loss": 0.3888, "num_tokens": 87587075.0, "step": 460 }, { "epoch": 0.1573647380098993, "grad_norm": 0.29415648359626967, "learning_rate": 2.616609783845279e-05, "loss": 0.3581, "num_tokens": 87787490.0, "step": 461 }, { "epoch": 0.15770609318996415, "grad_norm": 0.32143531406828574, "learning_rate": 2.6222980659840727e-05, "loss": 0.3722, "num_tokens": 87939307.0, "step": 462 }, { "epoch": 0.15804744837002901, "grad_norm": 0.3058031451058145, "learning_rate": 2.627986348122867e-05, "loss": 0.3729, "num_tokens": 88117643.0, "step": 463 }, { "epoch": 0.15838880355009388, "grad_norm": 0.32293256428389316, "learning_rate": 2.6336746302616612e-05, "loss": 0.3653, "num_tokens": 88318641.0, "step": 464 }, { "epoch": 0.15873015873015872, "grad_norm": 0.2766639406817887, "learning_rate": 2.6393629124004553e-05, "loss": 0.3575, "num_tokens": 88531281.0, "step": 465 }, { "epoch": 0.1590715139102236, "grad_norm": 0.2889835505154607, "learning_rate": 2.6450511945392494e-05, "loss": 0.3759, "num_tokens": 88744048.0, "step": 466 }, { "epoch": 0.15941286909028846, "grad_norm": 0.31380389204759523, "learning_rate": 2.6507394766780435e-05, "loss": 0.341, "num_tokens": 88922135.0, "step": 467 }, { "epoch": 0.1597542242703533, "grad_norm": 0.29885087656972076, "learning_rate": 2.6564277588168375e-05, "loss": 0.3484, "num_tokens": 89116915.0, "step": 468 }, { "epoch": 0.16009557945041816, "grad_norm": 0.3013463003750259, "learning_rate": 2.6621160409556316e-05, "loss": 0.3578, "num_tokens": 89283807.0, "step": 469 }, { "epoch": 0.16043693463048303, "grad_norm": 0.3310588480586975, "learning_rate": 2.6678043230944257e-05, "loss": 0.4002, "num_tokens": 89469474.0, "step": 470 }, { "epoch": 0.16077828981054787, "grad_norm": 0.2885149525405316, "learning_rate": 2.6734926052332198e-05, "loss": 0.3662, "num_tokens": 89684973.0, "step": 471 }, { "epoch": 0.16111964499061274, "grad_norm": 0.31537785672375235, "learning_rate": 2.6791808873720138e-05, "loss": 0.3753, "num_tokens": 89863666.0, "step": 472 }, { "epoch": 0.16146100017067758, "grad_norm": 0.3157114061282289, "learning_rate": 2.684869169510808e-05, "loss": 0.357, "num_tokens": 90057847.0, "step": 473 }, { "epoch": 0.16180235535074244, "grad_norm": 0.322021771105511, "learning_rate": 2.690557451649602e-05, "loss": 0.3565, "num_tokens": 90269396.0, "step": 474 }, { "epoch": 0.1621437105308073, "grad_norm": 0.2868518418431635, "learning_rate": 2.696245733788396e-05, "loss": 0.3403, "num_tokens": 90467295.0, "step": 475 }, { "epoch": 0.16248506571087215, "grad_norm": 0.30777703082651053, "learning_rate": 2.70193401592719e-05, "loss": 0.3464, "num_tokens": 90626483.0, "step": 476 }, { "epoch": 0.16282642089093702, "grad_norm": 0.33809422059544886, "learning_rate": 2.7076222980659842e-05, "loss": 0.3876, "num_tokens": 90826100.0, "step": 477 }, { "epoch": 0.16316777607100189, "grad_norm": 0.310804251564827, "learning_rate": 2.7133105802047783e-05, "loss": 0.3682, "num_tokens": 91038073.0, "step": 478 }, { "epoch": 0.16350913125106672, "grad_norm": 0.31258926344567944, "learning_rate": 2.7189988623435724e-05, "loss": 0.378, "num_tokens": 91251187.0, "step": 479 }, { "epoch": 0.1638504864311316, "grad_norm": 0.29343004793784644, "learning_rate": 2.7246871444823664e-05, "loss": 0.3542, "num_tokens": 91426152.0, "step": 480 }, { "epoch": 0.16419184161119646, "grad_norm": 0.28104164528477665, "learning_rate": 2.7303754266211605e-05, "loss": 0.3366, "num_tokens": 91623214.0, "step": 481 }, { "epoch": 0.1645331967912613, "grad_norm": 0.37428833982150744, "learning_rate": 2.7360637087599546e-05, "loss": 0.3782, "num_tokens": 91824878.0, "step": 482 }, { "epoch": 0.16487455197132617, "grad_norm": 0.3133440450403231, "learning_rate": 2.7417519908987487e-05, "loss": 0.3776, "num_tokens": 91979957.0, "step": 483 }, { "epoch": 0.16521590715139103, "grad_norm": 0.29773520344511445, "learning_rate": 2.7474402730375427e-05, "loss": 0.3543, "num_tokens": 92168368.0, "step": 484 }, { "epoch": 0.16555726233145587, "grad_norm": 0.3320084445215215, "learning_rate": 2.7531285551763368e-05, "loss": 0.3849, "num_tokens": 92394173.0, "step": 485 }, { "epoch": 0.16589861751152074, "grad_norm": 0.2978560325886573, "learning_rate": 2.758816837315131e-05, "loss": 0.3798, "num_tokens": 92631901.0, "step": 486 }, { "epoch": 0.1662399726915856, "grad_norm": 0.3281123646108139, "learning_rate": 2.764505119453925e-05, "loss": 0.3535, "num_tokens": 92821626.0, "step": 487 }, { "epoch": 0.16658132787165045, "grad_norm": 0.32857771909606975, "learning_rate": 2.770193401592719e-05, "loss": 0.3714, "num_tokens": 93022616.0, "step": 488 }, { "epoch": 0.16692268305171531, "grad_norm": 0.29898343159859425, "learning_rate": 2.775881683731513e-05, "loss": 0.3487, "num_tokens": 93187395.0, "step": 489 }, { "epoch": 0.16726403823178015, "grad_norm": 0.2725208172126251, "learning_rate": 2.7815699658703072e-05, "loss": 0.3581, "num_tokens": 93385433.0, "step": 490 }, { "epoch": 0.16760539341184502, "grad_norm": 0.31612570425570946, "learning_rate": 2.7872582480091013e-05, "loss": 0.3654, "num_tokens": 93596099.0, "step": 491 }, { "epoch": 0.1679467485919099, "grad_norm": 0.38657224984349214, "learning_rate": 2.7929465301478953e-05, "loss": 0.3513, "num_tokens": 93780687.0, "step": 492 }, { "epoch": 0.16828810377197473, "grad_norm": 0.3096129524145491, "learning_rate": 2.7986348122866894e-05, "loss": 0.3463, "num_tokens": 93970669.0, "step": 493 }, { "epoch": 0.1686294589520396, "grad_norm": 0.31974804224024256, "learning_rate": 2.8043230944254838e-05, "loss": 0.4116, "num_tokens": 94168320.0, "step": 494 }, { "epoch": 0.16897081413210446, "grad_norm": 0.36208068217124223, "learning_rate": 2.8100113765642776e-05, "loss": 0.4307, "num_tokens": 94368146.0, "step": 495 }, { "epoch": 0.1693121693121693, "grad_norm": 0.30380409328761415, "learning_rate": 2.815699658703072e-05, "loss": 0.3569, "num_tokens": 94536513.0, "step": 496 }, { "epoch": 0.16965352449223417, "grad_norm": 0.2963219228378777, "learning_rate": 2.8213879408418657e-05, "loss": 0.3435, "num_tokens": 94741190.0, "step": 497 }, { "epoch": 0.16999487967229904, "grad_norm": 0.3100542089115722, "learning_rate": 2.82707622298066e-05, "loss": 0.3841, "num_tokens": 94946544.0, "step": 498 }, { "epoch": 0.17033623485236388, "grad_norm": 0.319617037696925, "learning_rate": 2.832764505119454e-05, "loss": 0.3644, "num_tokens": 95114759.0, "step": 499 }, { "epoch": 0.17067759003242874, "grad_norm": 0.3048986941184723, "learning_rate": 2.8384527872582483e-05, "loss": 0.402, "num_tokens": 95331824.0, "step": 500 }, { "epoch": 0.1710189452124936, "grad_norm": 0.290112876717829, "learning_rate": 2.844141069397042e-05, "loss": 0.3465, "num_tokens": 95494126.0, "step": 501 }, { "epoch": 0.17136030039255845, "grad_norm": 0.30095588207463214, "learning_rate": 2.8498293515358364e-05, "loss": 0.3669, "num_tokens": 95671994.0, "step": 502 }, { "epoch": 0.17170165557262332, "grad_norm": 0.30828737699886166, "learning_rate": 2.85551763367463e-05, "loss": 0.3672, "num_tokens": 95836644.0, "step": 503 }, { "epoch": 0.17204301075268819, "grad_norm": 0.31879022081927844, "learning_rate": 2.8612059158134246e-05, "loss": 0.3745, "num_tokens": 96013465.0, "step": 504 }, { "epoch": 0.17238436593275303, "grad_norm": 0.3451202552870492, "learning_rate": 2.8668941979522183e-05, "loss": 0.4211, "num_tokens": 96208296.0, "step": 505 }, { "epoch": 0.1727257211128179, "grad_norm": 0.3042574035703363, "learning_rate": 2.8725824800910127e-05, "loss": 0.3597, "num_tokens": 96396968.0, "step": 506 }, { "epoch": 0.17306707629288273, "grad_norm": 0.3191610036240085, "learning_rate": 2.8782707622298065e-05, "loss": 0.388, "num_tokens": 96576268.0, "step": 507 }, { "epoch": 0.1734084314729476, "grad_norm": 0.32458551362580496, "learning_rate": 2.883959044368601e-05, "loss": 0.4056, "num_tokens": 96777065.0, "step": 508 }, { "epoch": 0.17374978665301247, "grad_norm": 0.28949886690354504, "learning_rate": 2.889647326507395e-05, "loss": 0.3607, "num_tokens": 96997576.0, "step": 509 }, { "epoch": 0.1740911418330773, "grad_norm": 0.2988873342639659, "learning_rate": 2.895335608646189e-05, "loss": 0.3439, "num_tokens": 97193859.0, "step": 510 }, { "epoch": 0.17443249701314217, "grad_norm": 0.2779235115251092, "learning_rate": 2.901023890784983e-05, "loss": 0.3664, "num_tokens": 97434332.0, "step": 511 }, { "epoch": 0.17477385219320704, "grad_norm": 0.30795976977522044, "learning_rate": 2.9067121729237772e-05, "loss": 0.3742, "num_tokens": 97649876.0, "step": 512 }, { "epoch": 0.17511520737327188, "grad_norm": 0.31878006671642656, "learning_rate": 2.9124004550625713e-05, "loss": 0.4148, "num_tokens": 97850324.0, "step": 513 }, { "epoch": 0.17545656255333675, "grad_norm": 0.27249832349648073, "learning_rate": 2.9180887372013653e-05, "loss": 0.3545, "num_tokens": 98062724.0, "step": 514 }, { "epoch": 0.17579791773340162, "grad_norm": 0.32189277596455607, "learning_rate": 2.9237770193401594e-05, "loss": 0.3812, "num_tokens": 98253151.0, "step": 515 }, { "epoch": 0.17613927291346645, "grad_norm": 0.31697908333445496, "learning_rate": 2.9294653014789535e-05, "loss": 0.3677, "num_tokens": 98426023.0, "step": 516 }, { "epoch": 0.17648062809353132, "grad_norm": 0.288931758262416, "learning_rate": 2.9351535836177476e-05, "loss": 0.3295, "num_tokens": 98630182.0, "step": 517 }, { "epoch": 0.1768219832735962, "grad_norm": 0.27994557069888454, "learning_rate": 2.9408418657565416e-05, "loss": 0.3471, "num_tokens": 98843202.0, "step": 518 }, { "epoch": 0.17716333845366103, "grad_norm": 0.27975203688183853, "learning_rate": 2.9465301478953357e-05, "loss": 0.3742, "num_tokens": 99090150.0, "step": 519 }, { "epoch": 0.1775046936337259, "grad_norm": 0.40358911037800244, "learning_rate": 2.9522184300341298e-05, "loss": 0.3837, "num_tokens": 99280157.0, "step": 520 }, { "epoch": 0.17784604881379074, "grad_norm": 0.3127313782312878, "learning_rate": 2.957906712172924e-05, "loss": 0.4126, "num_tokens": 99489570.0, "step": 521 }, { "epoch": 0.1781874039938556, "grad_norm": 0.32659352831576893, "learning_rate": 2.963594994311718e-05, "loss": 0.3651, "num_tokens": 99672168.0, "step": 522 }, { "epoch": 0.17852875917392047, "grad_norm": 0.30171778351552814, "learning_rate": 2.969283276450512e-05, "loss": 0.3553, "num_tokens": 99883401.0, "step": 523 }, { "epoch": 0.1788701143539853, "grad_norm": 0.3046795996625931, "learning_rate": 2.974971558589306e-05, "loss": 0.366, "num_tokens": 100068265.0, "step": 524 }, { "epoch": 0.17921146953405018, "grad_norm": 0.2913241371278296, "learning_rate": 2.9806598407281e-05, "loss": 0.3852, "num_tokens": 100298894.0, "step": 525 }, { "epoch": 0.17955282471411504, "grad_norm": 0.28262269013413505, "learning_rate": 2.9863481228668942e-05, "loss": 0.3751, "num_tokens": 100513308.0, "step": 526 }, { "epoch": 0.17989417989417988, "grad_norm": 0.2872383964289461, "learning_rate": 2.9920364050056883e-05, "loss": 0.3554, "num_tokens": 100716819.0, "step": 527 }, { "epoch": 0.18023553507424475, "grad_norm": 0.3038941202946037, "learning_rate": 2.9977246871444824e-05, "loss": 0.3467, "num_tokens": 100866712.0, "step": 528 }, { "epoch": 0.18057689025430962, "grad_norm": 0.2861998880928794, "learning_rate": 3.0034129692832768e-05, "loss": 0.356, "num_tokens": 101050925.0, "step": 529 }, { "epoch": 0.18091824543437446, "grad_norm": 0.30452395814877586, "learning_rate": 3.0091012514220705e-05, "loss": 0.3737, "num_tokens": 101252006.0, "step": 530 }, { "epoch": 0.18125960061443933, "grad_norm": 0.4216977700460161, "learning_rate": 3.014789533560865e-05, "loss": 0.3877, "num_tokens": 101446246.0, "step": 531 }, { "epoch": 0.1816009557945042, "grad_norm": 0.30530717460570705, "learning_rate": 3.0204778156996587e-05, "loss": 0.3708, "num_tokens": 101644370.0, "step": 532 }, { "epoch": 0.18194231097456903, "grad_norm": 0.2888534750233283, "learning_rate": 3.026166097838453e-05, "loss": 0.3536, "num_tokens": 101837824.0, "step": 533 }, { "epoch": 0.1822836661546339, "grad_norm": 0.31310521642942435, "learning_rate": 3.0318543799772468e-05, "loss": 0.3786, "num_tokens": 102020392.0, "step": 534 }, { "epoch": 0.18262502133469877, "grad_norm": 0.3509610173475715, "learning_rate": 3.0375426621160412e-05, "loss": 0.3865, "num_tokens": 102205965.0, "step": 535 }, { "epoch": 0.1829663765147636, "grad_norm": 0.30027570901760775, "learning_rate": 3.043230944254835e-05, "loss": 0.3562, "num_tokens": 102393045.0, "step": 536 }, { "epoch": 0.18330773169482847, "grad_norm": 0.28425505329493295, "learning_rate": 3.0489192263936294e-05, "loss": 0.3323, "num_tokens": 102582244.0, "step": 537 }, { "epoch": 0.1836490868748933, "grad_norm": 0.2929236934379806, "learning_rate": 3.054607508532423e-05, "loss": 0.3604, "num_tokens": 102774589.0, "step": 538 }, { "epoch": 0.18399044205495818, "grad_norm": 0.3214509719747809, "learning_rate": 3.060295790671218e-05, "loss": 0.3622, "num_tokens": 102954887.0, "step": 539 }, { "epoch": 0.18433179723502305, "grad_norm": 0.34849933441551023, "learning_rate": 3.065984072810011e-05, "loss": 0.4056, "num_tokens": 103111486.0, "step": 540 }, { "epoch": 0.1846731524150879, "grad_norm": 0.32762585184137893, "learning_rate": 3.071672354948806e-05, "loss": 0.3656, "num_tokens": 103308332.0, "step": 541 }, { "epoch": 0.18501450759515276, "grad_norm": 0.3772245489500054, "learning_rate": 3.0773606370875994e-05, "loss": 0.4338, "num_tokens": 103472424.0, "step": 542 }, { "epoch": 0.18535586277521762, "grad_norm": 0.3272311996599788, "learning_rate": 3.083048919226394e-05, "loss": 0.3698, "num_tokens": 103694875.0, "step": 543 }, { "epoch": 0.18569721795528246, "grad_norm": 0.33046196983945975, "learning_rate": 3.0887372013651876e-05, "loss": 0.3611, "num_tokens": 103863565.0, "step": 544 }, { "epoch": 0.18603857313534733, "grad_norm": 0.38948958314936527, "learning_rate": 3.094425483503982e-05, "loss": 0.3848, "num_tokens": 104032677.0, "step": 545 }, { "epoch": 0.1863799283154122, "grad_norm": 0.33420745096210325, "learning_rate": 3.100113765642776e-05, "loss": 0.3772, "num_tokens": 104204175.0, "step": 546 }, { "epoch": 0.18672128349547704, "grad_norm": 0.4606620254468934, "learning_rate": 3.1058020477815705e-05, "loss": 0.3719, "num_tokens": 104388103.0, "step": 547 }, { "epoch": 0.1870626386755419, "grad_norm": 0.33172212640796306, "learning_rate": 3.111490329920364e-05, "loss": 0.3897, "num_tokens": 104563124.0, "step": 548 }, { "epoch": 0.18740399385560677, "grad_norm": 0.3003108343182674, "learning_rate": 3.1171786120591586e-05, "loss": 0.347, "num_tokens": 104743638.0, "step": 549 }, { "epoch": 0.1877453490356716, "grad_norm": 0.30126416920854904, "learning_rate": 3.122866894197952e-05, "loss": 0.3741, "num_tokens": 104928238.0, "step": 550 }, { "epoch": 0.18808670421573648, "grad_norm": 0.34603224111131486, "learning_rate": 3.128555176336747e-05, "loss": 0.4098, "num_tokens": 105109159.0, "step": 551 }, { "epoch": 0.18842805939580134, "grad_norm": 0.3302433942340944, "learning_rate": 3.13424345847554e-05, "loss": 0.3801, "num_tokens": 105282191.0, "step": 552 }, { "epoch": 0.18876941457586618, "grad_norm": 0.2964215176853014, "learning_rate": 3.139931740614335e-05, "loss": 0.3637, "num_tokens": 105452081.0, "step": 553 }, { "epoch": 0.18911076975593105, "grad_norm": 0.33474819667008743, "learning_rate": 3.145620022753128e-05, "loss": 0.3727, "num_tokens": 105621001.0, "step": 554 }, { "epoch": 0.1894521249359959, "grad_norm": 0.33925320419294686, "learning_rate": 3.151308304891923e-05, "loss": 0.3675, "num_tokens": 105787864.0, "step": 555 }, { "epoch": 0.18979348011606076, "grad_norm": 0.3161153760816839, "learning_rate": 3.1569965870307165e-05, "loss": 0.3797, "num_tokens": 105977429.0, "step": 556 }, { "epoch": 0.19013483529612563, "grad_norm": 0.3077534239409492, "learning_rate": 3.162684869169511e-05, "loss": 0.3963, "num_tokens": 106168985.0, "step": 557 }, { "epoch": 0.19047619047619047, "grad_norm": 0.25783079100746475, "learning_rate": 3.1683731513083046e-05, "loss": 0.3501, "num_tokens": 106379895.0, "step": 558 }, { "epoch": 0.19081754565625533, "grad_norm": 0.32848226277055315, "learning_rate": 3.1740614334470994e-05, "loss": 0.4422, "num_tokens": 106602290.0, "step": 559 }, { "epoch": 0.1911589008363202, "grad_norm": 0.3366858987105297, "learning_rate": 3.179749715585893e-05, "loss": 0.3813, "num_tokens": 106783349.0, "step": 560 }, { "epoch": 0.19150025601638504, "grad_norm": 0.2673469461634933, "learning_rate": 3.1854379977246875e-05, "loss": 0.3459, "num_tokens": 106993312.0, "step": 561 }, { "epoch": 0.1918416111964499, "grad_norm": 0.29834646965341605, "learning_rate": 3.191126279863481e-05, "loss": 0.3243, "num_tokens": 107160309.0, "step": 562 }, { "epoch": 0.19218296637651477, "grad_norm": 0.3152615048807994, "learning_rate": 3.196814562002276e-05, "loss": 0.3616, "num_tokens": 107334331.0, "step": 563 }, { "epoch": 0.19252432155657961, "grad_norm": 0.33053577523155875, "learning_rate": 3.20250284414107e-05, "loss": 0.3528, "num_tokens": 107499465.0, "step": 564 }, { "epoch": 0.19286567673664448, "grad_norm": 0.2803228680774715, "learning_rate": 3.208191126279864e-05, "loss": 0.3611, "num_tokens": 107708430.0, "step": 565 }, { "epoch": 0.19320703191670935, "grad_norm": 0.3208316167451245, "learning_rate": 3.213879408418658e-05, "loss": 0.3624, "num_tokens": 107894234.0, "step": 566 }, { "epoch": 0.1935483870967742, "grad_norm": 0.3252866530541261, "learning_rate": 3.219567690557452e-05, "loss": 0.3814, "num_tokens": 108091551.0, "step": 567 }, { "epoch": 0.19388974227683906, "grad_norm": 0.3199469980234094, "learning_rate": 3.225255972696246e-05, "loss": 0.392, "num_tokens": 108276965.0, "step": 568 }, { "epoch": 0.1942310974569039, "grad_norm": 0.30382721771451354, "learning_rate": 3.23094425483504e-05, "loss": 0.3542, "num_tokens": 108483577.0, "step": 569 }, { "epoch": 0.19457245263696876, "grad_norm": 0.317036214910374, "learning_rate": 3.236632536973834e-05, "loss": 0.3485, "num_tokens": 108676833.0, "step": 570 }, { "epoch": 0.19491380781703363, "grad_norm": 0.3348939280258051, "learning_rate": 3.242320819112628e-05, "loss": 0.4088, "num_tokens": 108920633.0, "step": 571 }, { "epoch": 0.19525516299709847, "grad_norm": 0.31071527374072094, "learning_rate": 3.2480091012514224e-05, "loss": 0.3708, "num_tokens": 109113575.0, "step": 572 }, { "epoch": 0.19559651817716334, "grad_norm": 0.28444381306158734, "learning_rate": 3.2536973833902164e-05, "loss": 0.3402, "num_tokens": 109304605.0, "step": 573 }, { "epoch": 0.1959378733572282, "grad_norm": 0.35870580358336474, "learning_rate": 3.2593856655290105e-05, "loss": 0.3855, "num_tokens": 109516128.0, "step": 574 }, { "epoch": 0.19627922853729304, "grad_norm": 0.3050164702300536, "learning_rate": 3.2650739476678046e-05, "loss": 0.3537, "num_tokens": 109703089.0, "step": 575 }, { "epoch": 0.1966205837173579, "grad_norm": 0.2951500682123381, "learning_rate": 3.270762229806599e-05, "loss": 0.3612, "num_tokens": 109899261.0, "step": 576 }, { "epoch": 0.19696193889742278, "grad_norm": 0.32019709454729756, "learning_rate": 3.276450511945393e-05, "loss": 0.3762, "num_tokens": 110100672.0, "step": 577 }, { "epoch": 0.19730329407748762, "grad_norm": 0.30004785383802285, "learning_rate": 3.282138794084187e-05, "loss": 0.3785, "num_tokens": 110281710.0, "step": 578 }, { "epoch": 0.19764464925755248, "grad_norm": 0.3027985261843148, "learning_rate": 3.287827076222981e-05, "loss": 0.3616, "num_tokens": 110508637.0, "step": 579 }, { "epoch": 0.19798600443761735, "grad_norm": 0.305418788304247, "learning_rate": 3.293515358361775e-05, "loss": 0.3544, "num_tokens": 110717160.0, "step": 580 }, { "epoch": 0.1983273596176822, "grad_norm": 0.3224667181763949, "learning_rate": 3.299203640500569e-05, "loss": 0.3605, "num_tokens": 110883441.0, "step": 581 }, { "epoch": 0.19866871479774706, "grad_norm": 0.3246795022325737, "learning_rate": 3.304891922639363e-05, "loss": 0.3517, "num_tokens": 111047996.0, "step": 582 }, { "epoch": 0.19901006997781193, "grad_norm": 0.325240051929126, "learning_rate": 3.310580204778157e-05, "loss": 0.3711, "num_tokens": 111227039.0, "step": 583 }, { "epoch": 0.19935142515787677, "grad_norm": 0.32226388009859364, "learning_rate": 3.316268486916951e-05, "loss": 0.3951, "num_tokens": 111439459.0, "step": 584 }, { "epoch": 0.19969278033794163, "grad_norm": 0.31228335827397086, "learning_rate": 3.3219567690557453e-05, "loss": 0.3979, "num_tokens": 111642988.0, "step": 585 }, { "epoch": 0.20003413551800647, "grad_norm": 0.27428031469654474, "learning_rate": 3.3276450511945394e-05, "loss": 0.3422, "num_tokens": 111844091.0, "step": 586 }, { "epoch": 0.20037549069807134, "grad_norm": 0.3171995213823561, "learning_rate": 3.3333333333333335e-05, "loss": 0.3604, "num_tokens": 111999063.0, "step": 587 }, { "epoch": 0.2007168458781362, "grad_norm": 0.34819171592448817, "learning_rate": 3.3390216154721276e-05, "loss": 0.3974, "num_tokens": 112180111.0, "step": 588 }, { "epoch": 0.20105820105820105, "grad_norm": 0.2918746667098429, "learning_rate": 3.3447098976109216e-05, "loss": 0.3593, "num_tokens": 112375479.0, "step": 589 }, { "epoch": 0.20139955623826591, "grad_norm": 0.29099075380534384, "learning_rate": 3.350398179749716e-05, "loss": 0.3992, "num_tokens": 112582697.0, "step": 590 }, { "epoch": 0.20174091141833078, "grad_norm": 0.36903912731235555, "learning_rate": 3.35608646188851e-05, "loss": 0.3969, "num_tokens": 112788362.0, "step": 591 }, { "epoch": 0.20208226659839562, "grad_norm": 0.33278610942158793, "learning_rate": 3.361774744027304e-05, "loss": 0.3936, "num_tokens": 112953996.0, "step": 592 }, { "epoch": 0.2024236217784605, "grad_norm": 0.3185836186459437, "learning_rate": 3.367463026166098e-05, "loss": 0.382, "num_tokens": 113153212.0, "step": 593 }, { "epoch": 0.20276497695852536, "grad_norm": 0.29412200565560986, "learning_rate": 3.373151308304892e-05, "loss": 0.3445, "num_tokens": 113316207.0, "step": 594 }, { "epoch": 0.2031063321385902, "grad_norm": 0.2805365934425656, "learning_rate": 3.378839590443686e-05, "loss": 0.3449, "num_tokens": 113497324.0, "step": 595 }, { "epoch": 0.20344768731865506, "grad_norm": 0.3236097864698825, "learning_rate": 3.38452787258248e-05, "loss": 0.3899, "num_tokens": 113701034.0, "step": 596 }, { "epoch": 0.20378904249871993, "grad_norm": 0.2899197035319922, "learning_rate": 3.390216154721274e-05, "loss": 0.3616, "num_tokens": 113885137.0, "step": 597 }, { "epoch": 0.20413039767878477, "grad_norm": 0.2792631600430386, "learning_rate": 3.395904436860068e-05, "loss": 0.371, "num_tokens": 114068266.0, "step": 598 }, { "epoch": 0.20447175285884964, "grad_norm": 0.3280754812711094, "learning_rate": 3.401592718998863e-05, "loss": 0.3927, "num_tokens": 114221469.0, "step": 599 }, { "epoch": 0.2048131080389145, "grad_norm": 0.30079409228321713, "learning_rate": 3.4072810011376565e-05, "loss": 0.3731, "num_tokens": 114384843.0, "step": 600 }, { "epoch": 0.20515446321897934, "grad_norm": 0.28761176122048665, "learning_rate": 3.412969283276451e-05, "loss": 0.3816, "num_tokens": 114588988.0, "step": 601 }, { "epoch": 0.2054958183990442, "grad_norm": 0.31627780059691146, "learning_rate": 3.4186575654152446e-05, "loss": 0.3965, "num_tokens": 114798216.0, "step": 602 }, { "epoch": 0.20583717357910905, "grad_norm": 0.2829068250576115, "learning_rate": 3.4243458475540394e-05, "loss": 0.3573, "num_tokens": 114972526.0, "step": 603 }, { "epoch": 0.20617852875917392, "grad_norm": 0.3144366556467768, "learning_rate": 3.430034129692833e-05, "loss": 0.3922, "num_tokens": 115144356.0, "step": 604 }, { "epoch": 0.20651988393923879, "grad_norm": 0.29198309047765253, "learning_rate": 3.4357224118316275e-05, "loss": 0.3606, "num_tokens": 115322080.0, "step": 605 }, { "epoch": 0.20686123911930362, "grad_norm": 0.2956811432374924, "learning_rate": 3.441410693970421e-05, "loss": 0.3442, "num_tokens": 115497586.0, "step": 606 }, { "epoch": 0.2072025942993685, "grad_norm": 0.2700959768333322, "learning_rate": 3.447098976109216e-05, "loss": 0.3485, "num_tokens": 115701466.0, "step": 607 }, { "epoch": 0.20754394947943336, "grad_norm": 0.3064312022423273, "learning_rate": 3.452787258248009e-05, "loss": 0.3731, "num_tokens": 115905618.0, "step": 608 }, { "epoch": 0.2078853046594982, "grad_norm": 0.30274247470420607, "learning_rate": 3.458475540386804e-05, "loss": 0.3856, "num_tokens": 116097272.0, "step": 609 }, { "epoch": 0.20822665983956307, "grad_norm": 0.25819857208115854, "learning_rate": 3.464163822525597e-05, "loss": 0.3367, "num_tokens": 116303520.0, "step": 610 }, { "epoch": 0.20856801501962793, "grad_norm": 0.36231670535983795, "learning_rate": 3.469852104664392e-05, "loss": 0.398, "num_tokens": 116493370.0, "step": 611 }, { "epoch": 0.20890937019969277, "grad_norm": 0.29342730947876855, "learning_rate": 3.4755403868031854e-05, "loss": 0.3403, "num_tokens": 116679811.0, "step": 612 }, { "epoch": 0.20925072537975764, "grad_norm": 0.31520370896600824, "learning_rate": 3.48122866894198e-05, "loss": 0.3619, "num_tokens": 116835117.0, "step": 613 }, { "epoch": 0.2095920805598225, "grad_norm": 0.29757544574981504, "learning_rate": 3.4869169510807735e-05, "loss": 0.3778, "num_tokens": 117030919.0, "step": 614 }, { "epoch": 0.20993343573988735, "grad_norm": 0.28571397383140146, "learning_rate": 3.492605233219568e-05, "loss": 0.3397, "num_tokens": 117252950.0, "step": 615 }, { "epoch": 0.21027479091995221, "grad_norm": 0.2915617588721739, "learning_rate": 3.498293515358362e-05, "loss": 0.3961, "num_tokens": 117452828.0, "step": 616 }, { "epoch": 0.21061614610001705, "grad_norm": 0.2476344980107825, "learning_rate": 3.5039817974971564e-05, "loss": 0.3483, "num_tokens": 117668673.0, "step": 617 }, { "epoch": 0.21095750128008192, "grad_norm": 0.29817390316446013, "learning_rate": 3.50967007963595e-05, "loss": 0.3753, "num_tokens": 117930995.0, "step": 618 }, { "epoch": 0.2112988564601468, "grad_norm": 0.30039454189497333, "learning_rate": 3.5153583617747446e-05, "loss": 0.3301, "num_tokens": 118112522.0, "step": 619 }, { "epoch": 0.21164021164021163, "grad_norm": 0.3584658738803308, "learning_rate": 3.521046643913538e-05, "loss": 0.3958, "num_tokens": 118304694.0, "step": 620 }, { "epoch": 0.2119815668202765, "grad_norm": 0.2763179591799095, "learning_rate": 3.526734926052333e-05, "loss": 0.3513, "num_tokens": 118498494.0, "step": 621 }, { "epoch": 0.21232292200034136, "grad_norm": 0.35039271353087365, "learning_rate": 3.532423208191126e-05, "loss": 0.4053, "num_tokens": 118714973.0, "step": 622 }, { "epoch": 0.2126642771804062, "grad_norm": 0.33744503716106755, "learning_rate": 3.538111490329921e-05, "loss": 0.4303, "num_tokens": 118896816.0, "step": 623 }, { "epoch": 0.21300563236047107, "grad_norm": 0.29766495297827145, "learning_rate": 3.543799772468714e-05, "loss": 0.3396, "num_tokens": 119051749.0, "step": 624 }, { "epoch": 0.21334698754053594, "grad_norm": 0.33509153310821815, "learning_rate": 3.549488054607509e-05, "loss": 0.3678, "num_tokens": 119207621.0, "step": 625 }, { "epoch": 0.21368834272060078, "grad_norm": 0.29916936680026834, "learning_rate": 3.5551763367463024e-05, "loss": 0.3589, "num_tokens": 119368628.0, "step": 626 }, { "epoch": 0.21402969790066564, "grad_norm": 0.30433770777631647, "learning_rate": 3.560864618885097e-05, "loss": 0.3709, "num_tokens": 119566830.0, "step": 627 }, { "epoch": 0.2143710530807305, "grad_norm": 0.2805826773851775, "learning_rate": 3.5665529010238906e-05, "loss": 0.3353, "num_tokens": 119754887.0, "step": 628 }, { "epoch": 0.21471240826079535, "grad_norm": 0.2928885963703463, "learning_rate": 3.572241183162685e-05, "loss": 0.399, "num_tokens": 119970592.0, "step": 629 }, { "epoch": 0.21505376344086022, "grad_norm": 0.3060725127570512, "learning_rate": 3.577929465301479e-05, "loss": 0.3972, "num_tokens": 120161170.0, "step": 630 }, { "epoch": 0.21539511862092509, "grad_norm": 0.2766777194777378, "learning_rate": 3.5836177474402735e-05, "loss": 0.4014, "num_tokens": 120390942.0, "step": 631 }, { "epoch": 0.21573647380098993, "grad_norm": 0.2936009504037341, "learning_rate": 3.589306029579067e-05, "loss": 0.3772, "num_tokens": 120582253.0, "step": 632 }, { "epoch": 0.2160778289810548, "grad_norm": 0.2929378443709107, "learning_rate": 3.5949943117178616e-05, "loss": 0.3864, "num_tokens": 120780122.0, "step": 633 }, { "epoch": 0.21641918416111963, "grad_norm": 0.29200641353109624, "learning_rate": 3.600682593856656e-05, "loss": 0.3734, "num_tokens": 120951441.0, "step": 634 }, { "epoch": 0.2167605393411845, "grad_norm": 0.262756295607731, "learning_rate": 3.60637087599545e-05, "loss": 0.3714, "num_tokens": 121174033.0, "step": 635 }, { "epoch": 0.21710189452124937, "grad_norm": 0.25669638409476486, "learning_rate": 3.612059158134244e-05, "loss": 0.3743, "num_tokens": 121414368.0, "step": 636 }, { "epoch": 0.2174432497013142, "grad_norm": 0.2722667948243591, "learning_rate": 3.617747440273038e-05, "loss": 0.3535, "num_tokens": 121616153.0, "step": 637 }, { "epoch": 0.21778460488137907, "grad_norm": 0.3111684574585384, "learning_rate": 3.623435722411832e-05, "loss": 0.3574, "num_tokens": 121780927.0, "step": 638 }, { "epoch": 0.21812596006144394, "grad_norm": 0.30327116063635645, "learning_rate": 3.629124004550626e-05, "loss": 0.3905, "num_tokens": 121978994.0, "step": 639 }, { "epoch": 0.21846731524150878, "grad_norm": 0.2856464786812487, "learning_rate": 3.63481228668942e-05, "loss": 0.3489, "num_tokens": 122172955.0, "step": 640 }, { "epoch": 0.21880867042157365, "grad_norm": 0.2812732935449097, "learning_rate": 3.640500568828214e-05, "loss": 0.3498, "num_tokens": 122355306.0, "step": 641 }, { "epoch": 0.21915002560163852, "grad_norm": 0.3048862625899986, "learning_rate": 3.646188850967008e-05, "loss": 0.4132, "num_tokens": 122564818.0, "step": 642 }, { "epoch": 0.21949138078170335, "grad_norm": 0.27866383947969386, "learning_rate": 3.6518771331058024e-05, "loss": 0.3437, "num_tokens": 122736658.0, "step": 643 }, { "epoch": 0.21983273596176822, "grad_norm": 0.28876170911381666, "learning_rate": 3.6575654152445965e-05, "loss": 0.3743, "num_tokens": 122917979.0, "step": 644 }, { "epoch": 0.2201740911418331, "grad_norm": 0.30004871614882994, "learning_rate": 3.6632536973833905e-05, "loss": 0.3699, "num_tokens": 123114899.0, "step": 645 }, { "epoch": 0.22051544632189793, "grad_norm": 0.30813695467998964, "learning_rate": 3.6689419795221846e-05, "loss": 0.3828, "num_tokens": 123312476.0, "step": 646 }, { "epoch": 0.2208568015019628, "grad_norm": 0.30968313741501785, "learning_rate": 3.674630261660979e-05, "loss": 0.3874, "num_tokens": 123491978.0, "step": 647 }, { "epoch": 0.22119815668202766, "grad_norm": 0.2885337488901703, "learning_rate": 3.680318543799773e-05, "loss": 0.3621, "num_tokens": 123669167.0, "step": 648 }, { "epoch": 0.2215395118620925, "grad_norm": 0.28694155267917365, "learning_rate": 3.686006825938567e-05, "loss": 0.3623, "num_tokens": 123841403.0, "step": 649 }, { "epoch": 0.22188086704215737, "grad_norm": 0.2975620180854488, "learning_rate": 3.691695108077361e-05, "loss": 0.4119, "num_tokens": 124098510.0, "step": 650 }, { "epoch": 0.2222222222222222, "grad_norm": 0.290417613410932, "learning_rate": 3.697383390216155e-05, "loss": 0.369, "num_tokens": 124257299.0, "step": 651 }, { "epoch": 0.22256357740228708, "grad_norm": 0.32838206645003204, "learning_rate": 3.703071672354949e-05, "loss": 0.4149, "num_tokens": 124429142.0, "step": 652 }, { "epoch": 0.22290493258235194, "grad_norm": 0.3196899250653201, "learning_rate": 3.708759954493743e-05, "loss": 0.418, "num_tokens": 124624337.0, "step": 653 }, { "epoch": 0.22324628776241678, "grad_norm": 0.2671182973062966, "learning_rate": 3.714448236632537e-05, "loss": 0.3472, "num_tokens": 124831713.0, "step": 654 }, { "epoch": 0.22358764294248165, "grad_norm": 0.28521612598175455, "learning_rate": 3.720136518771331e-05, "loss": 0.3763, "num_tokens": 125013748.0, "step": 655 }, { "epoch": 0.22392899812254652, "grad_norm": 0.2958198767851041, "learning_rate": 3.7258248009101254e-05, "loss": 0.3845, "num_tokens": 125196538.0, "step": 656 }, { "epoch": 0.22427035330261136, "grad_norm": 0.29286210191924505, "learning_rate": 3.7315130830489194e-05, "loss": 0.3571, "num_tokens": 125369081.0, "step": 657 }, { "epoch": 0.22461170848267623, "grad_norm": 0.2778239391729808, "learning_rate": 3.7372013651877135e-05, "loss": 0.3819, "num_tokens": 125556869.0, "step": 658 }, { "epoch": 0.2249530636627411, "grad_norm": 0.2794657620055016, "learning_rate": 3.7428896473265076e-05, "loss": 0.3688, "num_tokens": 125751180.0, "step": 659 }, { "epoch": 0.22529441884280593, "grad_norm": 0.24930400151630558, "learning_rate": 3.7485779294653017e-05, "loss": 0.338, "num_tokens": 125948635.0, "step": 660 }, { "epoch": 0.2256357740228708, "grad_norm": 0.2713515255829585, "learning_rate": 3.754266211604096e-05, "loss": 0.3763, "num_tokens": 126153979.0, "step": 661 }, { "epoch": 0.22597712920293567, "grad_norm": 0.29685089875672993, "learning_rate": 3.75995449374289e-05, "loss": 0.3726, "num_tokens": 126321408.0, "step": 662 }, { "epoch": 0.2263184843830005, "grad_norm": 0.26820529387696684, "learning_rate": 3.765642775881684e-05, "loss": 0.3713, "num_tokens": 126546709.0, "step": 663 }, { "epoch": 0.22665983956306537, "grad_norm": 0.24081543570101532, "learning_rate": 3.771331058020478e-05, "loss": 0.3171, "num_tokens": 126742353.0, "step": 664 }, { "epoch": 0.2270011947431302, "grad_norm": 0.2761548096051374, "learning_rate": 3.777019340159272e-05, "loss": 0.3816, "num_tokens": 126957270.0, "step": 665 }, { "epoch": 0.22734254992319508, "grad_norm": 0.27479006974125086, "learning_rate": 3.782707622298066e-05, "loss": 0.3795, "num_tokens": 127152576.0, "step": 666 }, { "epoch": 0.22768390510325995, "grad_norm": 0.25434593851943976, "learning_rate": 3.78839590443686e-05, "loss": 0.3587, "num_tokens": 127359203.0, "step": 667 }, { "epoch": 0.2280252602833248, "grad_norm": 0.29193322408531575, "learning_rate": 3.794084186575654e-05, "loss": 0.3545, "num_tokens": 127558402.0, "step": 668 }, { "epoch": 0.22836661546338966, "grad_norm": 0.29069596697958644, "learning_rate": 3.799772468714448e-05, "loss": 0.3888, "num_tokens": 127741253.0, "step": 669 }, { "epoch": 0.22870797064345452, "grad_norm": 0.269644396475036, "learning_rate": 3.8054607508532424e-05, "loss": 0.3604, "num_tokens": 127927690.0, "step": 670 }, { "epoch": 0.22904932582351936, "grad_norm": 0.3027325993604869, "learning_rate": 3.811149032992037e-05, "loss": 0.3825, "num_tokens": 128088197.0, "step": 671 }, { "epoch": 0.22939068100358423, "grad_norm": 0.28860057078489404, "learning_rate": 3.8168373151308306e-05, "loss": 0.4023, "num_tokens": 128286417.0, "step": 672 }, { "epoch": 0.2297320361836491, "grad_norm": 0.2663865713019809, "learning_rate": 3.822525597269625e-05, "loss": 0.3508, "num_tokens": 128477666.0, "step": 673 }, { "epoch": 0.23007339136371394, "grad_norm": 0.2929595857435029, "learning_rate": 3.828213879408419e-05, "loss": 0.3802, "num_tokens": 128643289.0, "step": 674 }, { "epoch": 0.2304147465437788, "grad_norm": 0.4475446677124302, "learning_rate": 3.8339021615472135e-05, "loss": 0.3827, "num_tokens": 128904361.0, "step": 675 }, { "epoch": 0.23075610172384367, "grad_norm": 0.2576541448208543, "learning_rate": 3.839590443686007e-05, "loss": 0.3793, "num_tokens": 129131927.0, "step": 676 }, { "epoch": 0.2310974569039085, "grad_norm": 0.3189688595597637, "learning_rate": 3.8452787258248016e-05, "loss": 0.4014, "num_tokens": 129309135.0, "step": 677 }, { "epoch": 0.23143881208397338, "grad_norm": 0.32462920219662944, "learning_rate": 3.850967007963595e-05, "loss": 0.4211, "num_tokens": 129478808.0, "step": 678 }, { "epoch": 0.23178016726403824, "grad_norm": 0.36575492307735935, "learning_rate": 3.85665529010239e-05, "loss": 0.3837, "num_tokens": 129674127.0, "step": 679 }, { "epoch": 0.23212152244410308, "grad_norm": 0.29517605864686297, "learning_rate": 3.862343572241183e-05, "loss": 0.3601, "num_tokens": 129823684.0, "step": 680 }, { "epoch": 0.23246287762416795, "grad_norm": 0.31812059322215863, "learning_rate": 3.868031854379978e-05, "loss": 0.3827, "num_tokens": 130012242.0, "step": 681 }, { "epoch": 0.2328042328042328, "grad_norm": 0.30136187946673815, "learning_rate": 3.873720136518771e-05, "loss": 0.3734, "num_tokens": 130213999.0, "step": 682 }, { "epoch": 0.23314558798429766, "grad_norm": 0.28542283132861246, "learning_rate": 3.879408418657566e-05, "loss": 0.3843, "num_tokens": 130403480.0, "step": 683 }, { "epoch": 0.23348694316436253, "grad_norm": 0.3049727803174313, "learning_rate": 3.8850967007963595e-05, "loss": 0.3497, "num_tokens": 130556056.0, "step": 684 }, { "epoch": 0.23382829834442737, "grad_norm": 0.3123694276088038, "learning_rate": 3.890784982935154e-05, "loss": 0.3761, "num_tokens": 130747035.0, "step": 685 }, { "epoch": 0.23416965352449223, "grad_norm": 0.2982564198958956, "learning_rate": 3.8964732650739476e-05, "loss": 0.3939, "num_tokens": 130926250.0, "step": 686 }, { "epoch": 0.2345110087045571, "grad_norm": 0.322099728402816, "learning_rate": 3.9021615472127424e-05, "loss": 0.3828, "num_tokens": 131095095.0, "step": 687 }, { "epoch": 0.23485236388462194, "grad_norm": 0.2757064042028327, "learning_rate": 3.907849829351536e-05, "loss": 0.3671, "num_tokens": 131290627.0, "step": 688 }, { "epoch": 0.2351937190646868, "grad_norm": 0.29424431022256753, "learning_rate": 3.9135381114903305e-05, "loss": 0.419, "num_tokens": 131486295.0, "step": 689 }, { "epoch": 0.23553507424475167, "grad_norm": 0.32002201228528193, "learning_rate": 3.919226393629124e-05, "loss": 0.3972, "num_tokens": 131704759.0, "step": 690 }, { "epoch": 0.2358764294248165, "grad_norm": 0.29320543156927964, "learning_rate": 3.924914675767919e-05, "loss": 0.4105, "num_tokens": 131892603.0, "step": 691 }, { "epoch": 0.23621778460488138, "grad_norm": 0.2967207355770454, "learning_rate": 3.930602957906712e-05, "loss": 0.3996, "num_tokens": 132084557.0, "step": 692 }, { "epoch": 0.23655913978494625, "grad_norm": 0.2853894115011836, "learning_rate": 3.936291240045507e-05, "loss": 0.3956, "num_tokens": 132348529.0, "step": 693 }, { "epoch": 0.2369004949650111, "grad_norm": 0.27574648622501907, "learning_rate": 3.9419795221843e-05, "loss": 0.324, "num_tokens": 132530569.0, "step": 694 }, { "epoch": 0.23724185014507596, "grad_norm": 0.3344421386888771, "learning_rate": 3.947667804323095e-05, "loss": 0.4052, "num_tokens": 132707681.0, "step": 695 }, { "epoch": 0.23758320532514082, "grad_norm": 0.30316155146541185, "learning_rate": 3.9533560864618884e-05, "loss": 0.383, "num_tokens": 132901948.0, "step": 696 }, { "epoch": 0.23792456050520566, "grad_norm": 0.34896306535705274, "learning_rate": 3.959044368600683e-05, "loss": 0.3665, "num_tokens": 133085888.0, "step": 697 }, { "epoch": 0.23826591568527053, "grad_norm": 0.3058753154064625, "learning_rate": 3.9647326507394765e-05, "loss": 0.388, "num_tokens": 133280039.0, "step": 698 }, { "epoch": 0.23860727086533537, "grad_norm": 0.36811895887959967, "learning_rate": 3.970420932878271e-05, "loss": 0.4161, "num_tokens": 133453208.0, "step": 699 }, { "epoch": 0.23894862604540024, "grad_norm": 0.2942043038725156, "learning_rate": 3.976109215017065e-05, "loss": 0.4128, "num_tokens": 133622057.0, "step": 700 }, { "epoch": 0.2392899812254651, "grad_norm": 0.30054065743901065, "learning_rate": 3.9817974971558594e-05, "loss": 0.3453, "num_tokens": 133779413.0, "step": 701 }, { "epoch": 0.23963133640552994, "grad_norm": 0.31609879557014603, "learning_rate": 3.987485779294653e-05, "loss": 0.3827, "num_tokens": 133944273.0, "step": 702 }, { "epoch": 0.2399726915855948, "grad_norm": 0.28138530256532596, "learning_rate": 3.9931740614334476e-05, "loss": 0.3731, "num_tokens": 134138663.0, "step": 703 }, { "epoch": 0.24031404676565968, "grad_norm": 0.38305941291001344, "learning_rate": 3.998862343572241e-05, "loss": 0.3901, "num_tokens": 134319472.0, "step": 704 }, { "epoch": 0.24065540194572452, "grad_norm": 0.275526923030494, "learning_rate": 4.004550625711036e-05, "loss": 0.3822, "num_tokens": 134496048.0, "step": 705 }, { "epoch": 0.24099675712578938, "grad_norm": 0.31328515710925575, "learning_rate": 4.01023890784983e-05, "loss": 0.3675, "num_tokens": 134662787.0, "step": 706 }, { "epoch": 0.24133811230585425, "grad_norm": 0.2748165519081144, "learning_rate": 4.015927189988624e-05, "loss": 0.3488, "num_tokens": 134862425.0, "step": 707 }, { "epoch": 0.2416794674859191, "grad_norm": 0.34522943854243915, "learning_rate": 4.021615472127418e-05, "loss": 0.4016, "num_tokens": 135040791.0, "step": 708 }, { "epoch": 0.24202082266598396, "grad_norm": 0.3320340073572169, "learning_rate": 4.027303754266212e-05, "loss": 0.3708, "num_tokens": 135229885.0, "step": 709 }, { "epoch": 0.24236217784604883, "grad_norm": 0.3248017390881841, "learning_rate": 4.032992036405006e-05, "loss": 0.3823, "num_tokens": 135393311.0, "step": 710 }, { "epoch": 0.24270353302611367, "grad_norm": 0.2990520321790374, "learning_rate": 4.0386803185438e-05, "loss": 0.3506, "num_tokens": 135578891.0, "step": 711 }, { "epoch": 0.24304488820617853, "grad_norm": 0.3289151885006171, "learning_rate": 4.044368600682594e-05, "loss": 0.3974, "num_tokens": 135747779.0, "step": 712 }, { "epoch": 0.24338624338624337, "grad_norm": 0.3172571371894013, "learning_rate": 4.050056882821388e-05, "loss": 0.3813, "num_tokens": 135909941.0, "step": 713 }, { "epoch": 0.24372759856630824, "grad_norm": 0.2848241879370594, "learning_rate": 4.0557451649601824e-05, "loss": 0.3981, "num_tokens": 136149564.0, "step": 714 }, { "epoch": 0.2440689537463731, "grad_norm": 0.28106599608969973, "learning_rate": 4.0614334470989765e-05, "loss": 0.3546, "num_tokens": 136357376.0, "step": 715 }, { "epoch": 0.24441030892643795, "grad_norm": 0.2791914822471604, "learning_rate": 4.0671217292377706e-05, "loss": 0.3504, "num_tokens": 136580625.0, "step": 716 }, { "epoch": 0.24475166410650281, "grad_norm": 0.306600769564263, "learning_rate": 4.0728100113765646e-05, "loss": 0.4079, "num_tokens": 136794205.0, "step": 717 }, { "epoch": 0.24509301928656768, "grad_norm": 0.290537469409122, "learning_rate": 4.078498293515359e-05, "loss": 0.3727, "num_tokens": 136962604.0, "step": 718 }, { "epoch": 0.24543437446663252, "grad_norm": 0.2947362954261113, "learning_rate": 4.084186575654153e-05, "loss": 0.4073, "num_tokens": 137206592.0, "step": 719 }, { "epoch": 0.2457757296466974, "grad_norm": 0.30079317066636396, "learning_rate": 4.089874857792947e-05, "loss": 0.3605, "num_tokens": 137373584.0, "step": 720 }, { "epoch": 0.24611708482676226, "grad_norm": 0.31844627150885785, "learning_rate": 4.095563139931741e-05, "loss": 0.3703, "num_tokens": 137540006.0, "step": 721 }, { "epoch": 0.2464584400068271, "grad_norm": 0.3096440732581301, "learning_rate": 4.101251422070535e-05, "loss": 0.3905, "num_tokens": 137744924.0, "step": 722 }, { "epoch": 0.24679979518689196, "grad_norm": 0.27651432945952853, "learning_rate": 4.106939704209329e-05, "loss": 0.3862, "num_tokens": 137962496.0, "step": 723 }, { "epoch": 0.24714115036695683, "grad_norm": 0.30816331924922835, "learning_rate": 4.112627986348123e-05, "loss": 0.3686, "num_tokens": 138117214.0, "step": 724 }, { "epoch": 0.24748250554702167, "grad_norm": 0.32014351712323447, "learning_rate": 4.118316268486917e-05, "loss": 0.3897, "num_tokens": 138302209.0, "step": 725 }, { "epoch": 0.24782386072708654, "grad_norm": 0.3029667152641538, "learning_rate": 4.124004550625711e-05, "loss": 0.3681, "num_tokens": 138471463.0, "step": 726 }, { "epoch": 0.2481652159071514, "grad_norm": 0.31759740195714964, "learning_rate": 4.1296928327645054e-05, "loss": 0.3637, "num_tokens": 138661086.0, "step": 727 }, { "epoch": 0.24850657108721624, "grad_norm": 0.28028619686429773, "learning_rate": 4.1353811149032995e-05, "loss": 0.363, "num_tokens": 138849488.0, "step": 728 }, { "epoch": 0.2488479262672811, "grad_norm": 0.3175238704035962, "learning_rate": 4.1410693970420935e-05, "loss": 0.3567, "num_tokens": 139055353.0, "step": 729 }, { "epoch": 0.24918928144734595, "grad_norm": 0.27968248385394084, "learning_rate": 4.1467576791808876e-05, "loss": 0.3746, "num_tokens": 139258734.0, "step": 730 }, { "epoch": 0.24953063662741082, "grad_norm": 0.28774188964674285, "learning_rate": 4.152445961319682e-05, "loss": 0.3677, "num_tokens": 139428079.0, "step": 731 }, { "epoch": 0.24987199180747569, "grad_norm": 0.2937718012618049, "learning_rate": 4.158134243458476e-05, "loss": 0.3915, "num_tokens": 139647856.0, "step": 732 }, { "epoch": 0.25021334698754055, "grad_norm": 0.28411972771606314, "learning_rate": 4.16382252559727e-05, "loss": 0.364, "num_tokens": 139812395.0, "step": 733 }, { "epoch": 0.2505547021676054, "grad_norm": 0.3073141088806838, "learning_rate": 4.169510807736064e-05, "loss": 0.3851, "num_tokens": 139983795.0, "step": 734 }, { "epoch": 0.25089605734767023, "grad_norm": 0.33957335919408993, "learning_rate": 4.175199089874858e-05, "loss": 0.3888, "num_tokens": 140133376.0, "step": 735 }, { "epoch": 0.2512374125277351, "grad_norm": 0.2780706529424463, "learning_rate": 4.180887372013652e-05, "loss": 0.3371, "num_tokens": 140299670.0, "step": 736 }, { "epoch": 0.25157876770779997, "grad_norm": 0.32111710690532846, "learning_rate": 4.186575654152446e-05, "loss": 0.389, "num_tokens": 140502203.0, "step": 737 }, { "epoch": 0.2519201228878648, "grad_norm": 0.2967769180733575, "learning_rate": 4.19226393629124e-05, "loss": 0.3697, "num_tokens": 140712174.0, "step": 738 }, { "epoch": 0.2522614780679297, "grad_norm": 0.3514499431355995, "learning_rate": 4.197952218430034e-05, "loss": 0.3842, "num_tokens": 140841788.0, "step": 739 }, { "epoch": 0.25260283324799454, "grad_norm": 0.3293569722136867, "learning_rate": 4.2036405005688284e-05, "loss": 0.3985, "num_tokens": 141035457.0, "step": 740 }, { "epoch": 0.2529441884280594, "grad_norm": 0.29008679732397574, "learning_rate": 4.2093287827076224e-05, "loss": 0.3839, "num_tokens": 141256613.0, "step": 741 }, { "epoch": 0.2532855436081243, "grad_norm": 0.2892463220216436, "learning_rate": 4.2150170648464165e-05, "loss": 0.3752, "num_tokens": 141502194.0, "step": 742 }, { "epoch": 0.2536268987881891, "grad_norm": 0.27574552436528454, "learning_rate": 4.2207053469852106e-05, "loss": 0.3605, "num_tokens": 141722199.0, "step": 743 }, { "epoch": 0.25396825396825395, "grad_norm": 0.3182416809233448, "learning_rate": 4.2263936291240047e-05, "loss": 0.3488, "num_tokens": 141908389.0, "step": 744 }, { "epoch": 0.25430960914831885, "grad_norm": 0.30481144373382424, "learning_rate": 4.2320819112627994e-05, "loss": 0.431, "num_tokens": 142121086.0, "step": 745 }, { "epoch": 0.2546509643283837, "grad_norm": 0.2939500024876938, "learning_rate": 4.237770193401593e-05, "loss": 0.424, "num_tokens": 142331246.0, "step": 746 }, { "epoch": 0.25499231950844853, "grad_norm": 0.3308788253587497, "learning_rate": 4.2434584755403876e-05, "loss": 0.4069, "num_tokens": 142516564.0, "step": 747 }, { "epoch": 0.2553336746885134, "grad_norm": 0.2737707531379808, "learning_rate": 4.249146757679181e-05, "loss": 0.3753, "num_tokens": 142716823.0, "step": 748 }, { "epoch": 0.25567502986857826, "grad_norm": 0.26869405284905035, "learning_rate": 4.254835039817976e-05, "loss": 0.3505, "num_tokens": 142924220.0, "step": 749 }, { "epoch": 0.2560163850486431, "grad_norm": 0.3177271839663982, "learning_rate": 4.260523321956769e-05, "loss": 0.3987, "num_tokens": 143117057.0, "step": 750 }, { "epoch": 0.256357740228708, "grad_norm": 0.28014330884557903, "learning_rate": 4.266211604095564e-05, "loss": 0.3946, "num_tokens": 143302673.0, "step": 751 }, { "epoch": 0.25669909540877284, "grad_norm": 0.28720019214345555, "learning_rate": 4.271899886234357e-05, "loss": 0.3893, "num_tokens": 143498642.0, "step": 752 }, { "epoch": 0.2570404505888377, "grad_norm": 0.27599180426886544, "learning_rate": 4.277588168373152e-05, "loss": 0.3726, "num_tokens": 143695337.0, "step": 753 }, { "epoch": 0.2573818057689025, "grad_norm": 0.27561318541835983, "learning_rate": 4.2832764505119454e-05, "loss": 0.3299, "num_tokens": 143872258.0, "step": 754 }, { "epoch": 0.2577231609489674, "grad_norm": 0.3017611000213292, "learning_rate": 4.28896473265074e-05, "loss": 0.3681, "num_tokens": 144045104.0, "step": 755 }, { "epoch": 0.25806451612903225, "grad_norm": 0.294483042096815, "learning_rate": 4.2946530147895336e-05, "loss": 0.3961, "num_tokens": 144262084.0, "step": 756 }, { "epoch": 0.2584058713090971, "grad_norm": 0.4001072514079071, "learning_rate": 4.300341296928328e-05, "loss": 0.3671, "num_tokens": 144388634.0, "step": 757 }, { "epoch": 0.258747226489162, "grad_norm": 0.28142331139607213, "learning_rate": 4.306029579067122e-05, "loss": 0.3344, "num_tokens": 144553822.0, "step": 758 }, { "epoch": 0.2590885816692268, "grad_norm": 0.297084319030319, "learning_rate": 4.3117178612059165e-05, "loss": 0.4019, "num_tokens": 144758723.0, "step": 759 }, { "epoch": 0.25942993684929166, "grad_norm": 0.28654327917963957, "learning_rate": 4.31740614334471e-05, "loss": 0.3839, "num_tokens": 144971901.0, "step": 760 }, { "epoch": 0.25977129202935656, "grad_norm": 0.2756843092126658, "learning_rate": 4.3230944254835046e-05, "loss": 0.3617, "num_tokens": 145149128.0, "step": 761 }, { "epoch": 0.2601126472094214, "grad_norm": 0.28078432078537247, "learning_rate": 4.328782707622298e-05, "loss": 0.3571, "num_tokens": 145345121.0, "step": 762 }, { "epoch": 0.26045400238948624, "grad_norm": 0.3049027034062412, "learning_rate": 4.334470989761093e-05, "loss": 0.4018, "num_tokens": 145556464.0, "step": 763 }, { "epoch": 0.26079535756955113, "grad_norm": 0.3420955786929103, "learning_rate": 4.340159271899886e-05, "loss": 0.3747, "num_tokens": 145734605.0, "step": 764 }, { "epoch": 0.261136712749616, "grad_norm": 0.28987142286393863, "learning_rate": 4.345847554038681e-05, "loss": 0.3886, "num_tokens": 145932657.0, "step": 765 }, { "epoch": 0.2614780679296808, "grad_norm": 0.28168045819190535, "learning_rate": 4.351535836177474e-05, "loss": 0.3695, "num_tokens": 146164261.0, "step": 766 }, { "epoch": 0.2618194231097457, "grad_norm": 0.30830383824873975, "learning_rate": 4.357224118316269e-05, "loss": 0.381, "num_tokens": 146354638.0, "step": 767 }, { "epoch": 0.26216077828981055, "grad_norm": 0.28721917404735736, "learning_rate": 4.3629124004550625e-05, "loss": 0.345, "num_tokens": 146518245.0, "step": 768 }, { "epoch": 0.2625021334698754, "grad_norm": 0.29825126710808, "learning_rate": 4.368600682593857e-05, "loss": 0.351, "num_tokens": 146664833.0, "step": 769 }, { "epoch": 0.2628434886499403, "grad_norm": 0.288658135230219, "learning_rate": 4.3742889647326506e-05, "loss": 0.3786, "num_tokens": 146840034.0, "step": 770 }, { "epoch": 0.2631848438300051, "grad_norm": 0.29623900916422236, "learning_rate": 4.3799772468714454e-05, "loss": 0.372, "num_tokens": 147016042.0, "step": 771 }, { "epoch": 0.26352619901006996, "grad_norm": 0.29411978600213073, "learning_rate": 4.385665529010239e-05, "loss": 0.4071, "num_tokens": 147199098.0, "step": 772 }, { "epoch": 0.26386755419013486, "grad_norm": 0.27546358339788474, "learning_rate": 4.3913538111490335e-05, "loss": 0.3735, "num_tokens": 147384445.0, "step": 773 }, { "epoch": 0.2642089093701997, "grad_norm": 0.26202906238940377, "learning_rate": 4.397042093287827e-05, "loss": 0.3506, "num_tokens": 147586056.0, "step": 774 }, { "epoch": 0.26455026455026454, "grad_norm": 0.27168782026963695, "learning_rate": 4.402730375426622e-05, "loss": 0.3587, "num_tokens": 147787516.0, "step": 775 }, { "epoch": 0.26489161973032943, "grad_norm": 0.3073242310090664, "learning_rate": 4.408418657565416e-05, "loss": 0.3844, "num_tokens": 147964128.0, "step": 776 }, { "epoch": 0.26523297491039427, "grad_norm": 0.29074941920282205, "learning_rate": 4.41410693970421e-05, "loss": 0.387, "num_tokens": 148184775.0, "step": 777 }, { "epoch": 0.2655743300904591, "grad_norm": 0.2777811771010441, "learning_rate": 4.419795221843004e-05, "loss": 0.3849, "num_tokens": 148413355.0, "step": 778 }, { "epoch": 0.265915685270524, "grad_norm": 0.3256232504022834, "learning_rate": 4.425483503981798e-05, "loss": 0.3758, "num_tokens": 148570365.0, "step": 779 }, { "epoch": 0.26625704045058884, "grad_norm": 0.32971986188992414, "learning_rate": 4.431171786120592e-05, "loss": 0.4255, "num_tokens": 148767975.0, "step": 780 }, { "epoch": 0.2665983956306537, "grad_norm": 0.2898325267609617, "learning_rate": 4.436860068259386e-05, "loss": 0.4016, "num_tokens": 148966826.0, "step": 781 }, { "epoch": 0.2669397508107186, "grad_norm": 0.32181205073191044, "learning_rate": 4.44254835039818e-05, "loss": 0.4127, "num_tokens": 149177015.0, "step": 782 }, { "epoch": 0.2672811059907834, "grad_norm": 0.26244245813113626, "learning_rate": 4.448236632536974e-05, "loss": 0.368, "num_tokens": 149372830.0, "step": 783 }, { "epoch": 0.26762246117084826, "grad_norm": 0.2921773780101119, "learning_rate": 4.4539249146757683e-05, "loss": 0.3865, "num_tokens": 149580652.0, "step": 784 }, { "epoch": 0.2679638163509131, "grad_norm": 0.2872818484483615, "learning_rate": 4.4596131968145624e-05, "loss": 0.3704, "num_tokens": 149813914.0, "step": 785 }, { "epoch": 0.268305171530978, "grad_norm": 0.279953203186594, "learning_rate": 4.4653014789533565e-05, "loss": 0.3765, "num_tokens": 150018050.0, "step": 786 }, { "epoch": 0.26864652671104283, "grad_norm": 0.27783232255617896, "learning_rate": 4.4709897610921506e-05, "loss": 0.4026, "num_tokens": 150223945.0, "step": 787 }, { "epoch": 0.26898788189110767, "grad_norm": 0.3500112534346218, "learning_rate": 4.4766780432309446e-05, "loss": 0.3894, "num_tokens": 150376185.0, "step": 788 }, { "epoch": 0.26932923707117257, "grad_norm": 0.2941724140361834, "learning_rate": 4.482366325369739e-05, "loss": 0.3712, "num_tokens": 150524809.0, "step": 789 }, { "epoch": 0.2696705922512374, "grad_norm": 0.32505369081005103, "learning_rate": 4.488054607508533e-05, "loss": 0.4198, "num_tokens": 150739592.0, "step": 790 }, { "epoch": 0.27001194743130225, "grad_norm": 0.3378059272857872, "learning_rate": 4.493742889647327e-05, "loss": 0.3705, "num_tokens": 150916765.0, "step": 791 }, { "epoch": 0.27035330261136714, "grad_norm": 0.3021381848742227, "learning_rate": 4.499431171786121e-05, "loss": 0.3561, "num_tokens": 151084393.0, "step": 792 }, { "epoch": 0.270694657791432, "grad_norm": 0.2617964930363487, "learning_rate": 4.505119453924915e-05, "loss": 0.3778, "num_tokens": 151295331.0, "step": 793 }, { "epoch": 0.2710360129714968, "grad_norm": 0.32170750035627893, "learning_rate": 4.510807736063709e-05, "loss": 0.4263, "num_tokens": 151491580.0, "step": 794 }, { "epoch": 0.2713773681515617, "grad_norm": 0.3243263623765664, "learning_rate": 4.516496018202503e-05, "loss": 0.3733, "num_tokens": 151659143.0, "step": 795 }, { "epoch": 0.27171872333162655, "grad_norm": 0.26896723998135635, "learning_rate": 4.522184300341297e-05, "loss": 0.3678, "num_tokens": 151869364.0, "step": 796 }, { "epoch": 0.2720600785116914, "grad_norm": 0.3125343092923597, "learning_rate": 4.527872582480091e-05, "loss": 0.3847, "num_tokens": 152077479.0, "step": 797 }, { "epoch": 0.2724014336917563, "grad_norm": 0.29366979562451, "learning_rate": 4.5335608646188854e-05, "loss": 0.378, "num_tokens": 152281177.0, "step": 798 }, { "epoch": 0.27274278887182113, "grad_norm": 0.29844801599315957, "learning_rate": 4.5392491467576795e-05, "loss": 0.3902, "num_tokens": 152478682.0, "step": 799 }, { "epoch": 0.27308414405188597, "grad_norm": 0.2848480627781522, "learning_rate": 4.5449374288964735e-05, "loss": 0.3598, "num_tokens": 152630833.0, "step": 800 }, { "epoch": 0.27342549923195086, "grad_norm": 0.30347060848497986, "learning_rate": 4.5506257110352676e-05, "loss": 0.3716, "num_tokens": 152867910.0, "step": 801 }, { "epoch": 0.2737668544120157, "grad_norm": 0.29791372185588455, "learning_rate": 4.556313993174062e-05, "loss": 0.3215, "num_tokens": 153024303.0, "step": 802 }, { "epoch": 0.27410820959208054, "grad_norm": 0.29744587378439746, "learning_rate": 4.562002275312856e-05, "loss": 0.3648, "num_tokens": 153194510.0, "step": 803 }, { "epoch": 0.27444956477214544, "grad_norm": 0.32242580597366516, "learning_rate": 4.56769055745165e-05, "loss": 0.3757, "num_tokens": 153384437.0, "step": 804 }, { "epoch": 0.2747909199522103, "grad_norm": 0.29893334767026664, "learning_rate": 4.573378839590444e-05, "loss": 0.3735, "num_tokens": 153557051.0, "step": 805 }, { "epoch": 0.2751322751322751, "grad_norm": 0.28770771848672844, "learning_rate": 4.579067121729238e-05, "loss": 0.3786, "num_tokens": 153765726.0, "step": 806 }, { "epoch": 0.27547363031234, "grad_norm": 0.2946040844741079, "learning_rate": 4.584755403868032e-05, "loss": 0.3722, "num_tokens": 153970724.0, "step": 807 }, { "epoch": 0.27581498549240485, "grad_norm": 0.30380790198295343, "learning_rate": 4.590443686006826e-05, "loss": 0.3758, "num_tokens": 154122692.0, "step": 808 }, { "epoch": 0.2761563406724697, "grad_norm": 0.2851220447357819, "learning_rate": 4.59613196814562e-05, "loss": 0.3739, "num_tokens": 154322409.0, "step": 809 }, { "epoch": 0.2764976958525346, "grad_norm": 0.2835610656912009, "learning_rate": 4.601820250284414e-05, "loss": 0.3751, "num_tokens": 154533654.0, "step": 810 }, { "epoch": 0.2768390510325994, "grad_norm": 0.2867179649129961, "learning_rate": 4.6075085324232084e-05, "loss": 0.3838, "num_tokens": 154759188.0, "step": 811 }, { "epoch": 0.27718040621266427, "grad_norm": 0.3141519758916294, "learning_rate": 4.6131968145620024e-05, "loss": 0.4453, "num_tokens": 154974440.0, "step": 812 }, { "epoch": 0.27752176139272916, "grad_norm": 0.29841634217532403, "learning_rate": 4.6188850967007965e-05, "loss": 0.3587, "num_tokens": 155157961.0, "step": 813 }, { "epoch": 0.277863116572794, "grad_norm": 0.33675416565343624, "learning_rate": 4.6245733788395906e-05, "loss": 0.401, "num_tokens": 155319633.0, "step": 814 }, { "epoch": 0.27820447175285884, "grad_norm": 0.2936436660911248, "learning_rate": 4.630261660978385e-05, "loss": 0.384, "num_tokens": 155518471.0, "step": 815 }, { "epoch": 0.27854582693292373, "grad_norm": 0.30517757180651783, "learning_rate": 4.635949943117179e-05, "loss": 0.3881, "num_tokens": 155689425.0, "step": 816 }, { "epoch": 0.2788871821129886, "grad_norm": 0.3159201616213531, "learning_rate": 4.641638225255973e-05, "loss": 0.4196, "num_tokens": 155882150.0, "step": 817 }, { "epoch": 0.2792285372930534, "grad_norm": 0.3047559839027094, "learning_rate": 4.647326507394767e-05, "loss": 0.3982, "num_tokens": 156101871.0, "step": 818 }, { "epoch": 0.27956989247311825, "grad_norm": 0.29101603638971846, "learning_rate": 4.653014789533561e-05, "loss": 0.3824, "num_tokens": 156311453.0, "step": 819 }, { "epoch": 0.27991124765318315, "grad_norm": 0.31233205137668796, "learning_rate": 4.658703071672355e-05, "loss": 0.3489, "num_tokens": 156497947.0, "step": 820 }, { "epoch": 0.280252602833248, "grad_norm": 0.2893067455139457, "learning_rate": 4.664391353811149e-05, "loss": 0.3652, "num_tokens": 156653249.0, "step": 821 }, { "epoch": 0.2805939580133128, "grad_norm": 0.31518769373430217, "learning_rate": 4.670079635949943e-05, "loss": 0.387, "num_tokens": 156845571.0, "step": 822 }, { "epoch": 0.2809353131933777, "grad_norm": 0.3011529872792471, "learning_rate": 4.675767918088737e-05, "loss": 0.3875, "num_tokens": 157027606.0, "step": 823 }, { "epoch": 0.28127666837344256, "grad_norm": 0.3026301735393091, "learning_rate": 4.6814562002275314e-05, "loss": 0.4094, "num_tokens": 157210641.0, "step": 824 }, { "epoch": 0.2816180235535074, "grad_norm": 0.2839854289609952, "learning_rate": 4.6871444823663254e-05, "loss": 0.3338, "num_tokens": 157390850.0, "step": 825 }, { "epoch": 0.2819593787335723, "grad_norm": 0.2969857898549683, "learning_rate": 4.6928327645051195e-05, "loss": 0.3857, "num_tokens": 157579657.0, "step": 826 }, { "epoch": 0.28230073391363714, "grad_norm": 0.33655122860170655, "learning_rate": 4.6985210466439136e-05, "loss": 0.4175, "num_tokens": 157747149.0, "step": 827 }, { "epoch": 0.282642089093702, "grad_norm": 0.28309460326070224, "learning_rate": 4.7042093287827077e-05, "loss": 0.359, "num_tokens": 157935581.0, "step": 828 }, { "epoch": 0.28298344427376687, "grad_norm": 0.26461231223731274, "learning_rate": 4.709897610921502e-05, "loss": 0.357, "num_tokens": 158135834.0, "step": 829 }, { "epoch": 0.2833247994538317, "grad_norm": 0.32472386135395115, "learning_rate": 4.715585893060296e-05, "loss": 0.3381, "num_tokens": 158328146.0, "step": 830 }, { "epoch": 0.28366615463389655, "grad_norm": 0.27418000861904934, "learning_rate": 4.72127417519909e-05, "loss": 0.3655, "num_tokens": 158517866.0, "step": 831 }, { "epoch": 0.28400750981396145, "grad_norm": 0.2900388832016542, "learning_rate": 4.726962457337884e-05, "loss": 0.414, "num_tokens": 158711312.0, "step": 832 }, { "epoch": 0.2843488649940263, "grad_norm": 0.2869945599096015, "learning_rate": 4.732650739476678e-05, "loss": 0.4057, "num_tokens": 158906091.0, "step": 833 }, { "epoch": 0.2846902201740911, "grad_norm": 0.28741365991164824, "learning_rate": 4.738339021615472e-05, "loss": 0.4132, "num_tokens": 159115008.0, "step": 834 }, { "epoch": 0.285031575354156, "grad_norm": 0.2687431974250292, "learning_rate": 4.744027303754267e-05, "loss": 0.3539, "num_tokens": 159310599.0, "step": 835 }, { "epoch": 0.28537293053422086, "grad_norm": 0.30470305339951975, "learning_rate": 4.74971558589306e-05, "loss": 0.3801, "num_tokens": 159464234.0, "step": 836 }, { "epoch": 0.2857142857142857, "grad_norm": 0.2892983000970236, "learning_rate": 4.755403868031855e-05, "loss": 0.3719, "num_tokens": 159661282.0, "step": 837 }, { "epoch": 0.2860556408943506, "grad_norm": 0.2929821932550833, "learning_rate": 4.7610921501706484e-05, "loss": 0.4349, "num_tokens": 159859656.0, "step": 838 }, { "epoch": 0.28639699607441543, "grad_norm": 0.279453065909252, "learning_rate": 4.766780432309443e-05, "loss": 0.3574, "num_tokens": 160026478.0, "step": 839 }, { "epoch": 0.2867383512544803, "grad_norm": 0.2871206863007229, "learning_rate": 4.7724687144482366e-05, "loss": 0.4147, "num_tokens": 160241340.0, "step": 840 }, { "epoch": 0.28707970643454517, "grad_norm": 0.2924717111005006, "learning_rate": 4.778156996587031e-05, "loss": 0.3871, "num_tokens": 160397938.0, "step": 841 }, { "epoch": 0.28742106161461, "grad_norm": 0.2970648516069964, "learning_rate": 4.783845278725825e-05, "loss": 0.3705, "num_tokens": 160564940.0, "step": 842 }, { "epoch": 0.28776241679467485, "grad_norm": 0.324319797793475, "learning_rate": 4.7895335608646195e-05, "loss": 0.3975, "num_tokens": 160772317.0, "step": 843 }, { "epoch": 0.28810377197473974, "grad_norm": 0.2535927923677306, "learning_rate": 4.795221843003413e-05, "loss": 0.3533, "num_tokens": 160978555.0, "step": 844 }, { "epoch": 0.2884451271548046, "grad_norm": 0.29221551399586493, "learning_rate": 4.8009101251422076e-05, "loss": 0.3725, "num_tokens": 161162397.0, "step": 845 }, { "epoch": 0.2887864823348694, "grad_norm": 1.0201939236252064, "learning_rate": 4.806598407281002e-05, "loss": 0.4078, "num_tokens": 161376419.0, "step": 846 }, { "epoch": 0.2891278375149343, "grad_norm": 0.272595428291083, "learning_rate": 4.812286689419796e-05, "loss": 0.3845, "num_tokens": 161584719.0, "step": 847 }, { "epoch": 0.28946919269499916, "grad_norm": 0.28034703674338207, "learning_rate": 4.81797497155859e-05, "loss": 0.3874, "num_tokens": 161778940.0, "step": 848 }, { "epoch": 0.289810547875064, "grad_norm": 0.3316776503956698, "learning_rate": 4.823663253697384e-05, "loss": 0.3806, "num_tokens": 161958336.0, "step": 849 }, { "epoch": 0.29015190305512883, "grad_norm": 0.30440404257780473, "learning_rate": 4.829351535836178e-05, "loss": 0.3904, "num_tokens": 162136331.0, "step": 850 }, { "epoch": 0.29049325823519373, "grad_norm": 0.2944829007932268, "learning_rate": 4.835039817974972e-05, "loss": 0.3733, "num_tokens": 162317344.0, "step": 851 }, { "epoch": 0.29083461341525857, "grad_norm": 0.2942686208355024, "learning_rate": 4.840728100113766e-05, "loss": 0.4042, "num_tokens": 162530532.0, "step": 852 }, { "epoch": 0.2911759685953234, "grad_norm": 0.3852254721716583, "learning_rate": 4.84641638225256e-05, "loss": 0.3803, "num_tokens": 162720667.0, "step": 853 }, { "epoch": 0.2915173237753883, "grad_norm": 0.30012065990013836, "learning_rate": 4.852104664391354e-05, "loss": 0.3891, "num_tokens": 162892822.0, "step": 854 }, { "epoch": 0.29185867895545314, "grad_norm": 0.27773813406204145, "learning_rate": 4.8577929465301484e-05, "loss": 0.4035, "num_tokens": 163108580.0, "step": 855 }, { "epoch": 0.292200034135518, "grad_norm": 0.2826262596034848, "learning_rate": 4.8634812286689424e-05, "loss": 0.4081, "num_tokens": 163320387.0, "step": 856 }, { "epoch": 0.2925413893155829, "grad_norm": 0.2583489321909449, "learning_rate": 4.8691695108077365e-05, "loss": 0.3817, "num_tokens": 163540561.0, "step": 857 }, { "epoch": 0.2928827444956477, "grad_norm": 0.27817596046666115, "learning_rate": 4.8748577929465306e-05, "loss": 0.3532, "num_tokens": 163769975.0, "step": 858 }, { "epoch": 0.29322409967571256, "grad_norm": 0.2632775361600606, "learning_rate": 4.8805460750853247e-05, "loss": 0.3805, "num_tokens": 163978066.0, "step": 859 }, { "epoch": 0.29356545485577745, "grad_norm": 0.3094566415369196, "learning_rate": 4.886234357224119e-05, "loss": 0.3919, "num_tokens": 164174425.0, "step": 860 }, { "epoch": 0.2939068100358423, "grad_norm": 0.28233735284095235, "learning_rate": 4.891922639362913e-05, "loss": 0.378, "num_tokens": 164354616.0, "step": 861 }, { "epoch": 0.29424816521590713, "grad_norm": 0.2850471407501937, "learning_rate": 4.897610921501707e-05, "loss": 0.3709, "num_tokens": 164563478.0, "step": 862 }, { "epoch": 0.294589520395972, "grad_norm": 0.30999640632834263, "learning_rate": 4.903299203640501e-05, "loss": 0.3648, "num_tokens": 164713681.0, "step": 863 }, { "epoch": 0.29493087557603687, "grad_norm": 0.28420679485085704, "learning_rate": 4.908987485779295e-05, "loss": 0.3736, "num_tokens": 164896990.0, "step": 864 }, { "epoch": 0.2952722307561017, "grad_norm": 0.3548569782982267, "learning_rate": 4.914675767918089e-05, "loss": 0.3537, "num_tokens": 165093394.0, "step": 865 }, { "epoch": 0.2956135859361666, "grad_norm": 0.2673953272996152, "learning_rate": 4.920364050056883e-05, "loss": 0.3617, "num_tokens": 165293290.0, "step": 866 }, { "epoch": 0.29595494111623144, "grad_norm": 0.27447611173214076, "learning_rate": 4.926052332195677e-05, "loss": 0.3801, "num_tokens": 165479740.0, "step": 867 }, { "epoch": 0.2962962962962963, "grad_norm": 0.3040425241411616, "learning_rate": 4.931740614334471e-05, "loss": 0.3972, "num_tokens": 165686054.0, "step": 868 }, { "epoch": 0.2966376514763612, "grad_norm": 0.27185725045607884, "learning_rate": 4.9374288964732654e-05, "loss": 0.3505, "num_tokens": 165871169.0, "step": 869 }, { "epoch": 0.296979006656426, "grad_norm": 0.7234209823269893, "learning_rate": 4.9431171786120595e-05, "loss": 0.3986, "num_tokens": 166059893.0, "step": 870 }, { "epoch": 0.29732036183649085, "grad_norm": 0.27164910695916233, "learning_rate": 4.9488054607508536e-05, "loss": 0.3677, "num_tokens": 166284503.0, "step": 871 }, { "epoch": 0.29766171701655575, "grad_norm": 0.2623562736949539, "learning_rate": 4.9544937428896476e-05, "loss": 0.3768, "num_tokens": 166498343.0, "step": 872 }, { "epoch": 0.2980030721966206, "grad_norm": 0.29942955098592844, "learning_rate": 4.960182025028442e-05, "loss": 0.3748, "num_tokens": 166653122.0, "step": 873 }, { "epoch": 0.29834442737668543, "grad_norm": 0.2824349523276681, "learning_rate": 4.965870307167236e-05, "loss": 0.366, "num_tokens": 166843203.0, "step": 874 }, { "epoch": 0.2986857825567503, "grad_norm": 0.2457348297943073, "learning_rate": 4.97155858930603e-05, "loss": 0.3638, "num_tokens": 167077282.0, "step": 875 }, { "epoch": 0.29902713773681516, "grad_norm": 0.30653825270543816, "learning_rate": 4.977246871444824e-05, "loss": 0.3584, "num_tokens": 167264797.0, "step": 876 }, { "epoch": 0.29936849291688, "grad_norm": 0.30867787893254567, "learning_rate": 4.982935153583618e-05, "loss": 0.4157, "num_tokens": 167437701.0, "step": 877 }, { "epoch": 0.2997098480969449, "grad_norm": 0.27143758300783255, "learning_rate": 4.988623435722412e-05, "loss": 0.36, "num_tokens": 167615031.0, "step": 878 }, { "epoch": 0.30005120327700974, "grad_norm": 0.26053783431349964, "learning_rate": 4.994311717861206e-05, "loss": 0.3716, "num_tokens": 167826988.0, "step": 879 }, { "epoch": 0.3003925584570746, "grad_norm": 0.305594959548382, "learning_rate": 5e-05, "loss": 0.417, "num_tokens": 168029045.0, "step": 880 }, { "epoch": 0.3007339136371394, "grad_norm": 0.3021470349247126, "learning_rate": 4.999367728882145e-05, "loss": 0.4218, "num_tokens": 168242517.0, "step": 881 }, { "epoch": 0.3010752688172043, "grad_norm": 0.2595922079718509, "learning_rate": 4.9987354577642896e-05, "loss": 0.3832, "num_tokens": 168445089.0, "step": 882 }, { "epoch": 0.30141662399726915, "grad_norm": 0.3983445185290601, "learning_rate": 4.9981031866464337e-05, "loss": 0.3537, "num_tokens": 168628487.0, "step": 883 }, { "epoch": 0.301757979177334, "grad_norm": 0.28472309396674783, "learning_rate": 4.997470915528579e-05, "loss": 0.3879, "num_tokens": 168831597.0, "step": 884 }, { "epoch": 0.3020993343573989, "grad_norm": 0.2627666444136737, "learning_rate": 4.996838644410724e-05, "loss": 0.3895, "num_tokens": 169021935.0, "step": 885 }, { "epoch": 0.3024406895374637, "grad_norm": 0.2865083317360197, "learning_rate": 4.9962063732928684e-05, "loss": 0.4118, "num_tokens": 169245865.0, "step": 886 }, { "epoch": 0.30278204471752856, "grad_norm": 0.33021438579625073, "learning_rate": 4.995574102175013e-05, "loss": 0.3882, "num_tokens": 169431235.0, "step": 887 }, { "epoch": 0.30312339989759346, "grad_norm": 0.28398683521653256, "learning_rate": 4.994941831057158e-05, "loss": 0.416, "num_tokens": 169643295.0, "step": 888 }, { "epoch": 0.3034647550776583, "grad_norm": 0.29815503911274066, "learning_rate": 4.994309559939302e-05, "loss": 0.4017, "num_tokens": 169819028.0, "step": 889 }, { "epoch": 0.30380611025772314, "grad_norm": 0.2904214005010795, "learning_rate": 4.9936772888214465e-05, "loss": 0.3673, "num_tokens": 169991471.0, "step": 890 }, { "epoch": 0.30414746543778803, "grad_norm": 0.2987895346004567, "learning_rate": 4.993045017703591e-05, "loss": 0.4133, "num_tokens": 170217659.0, "step": 891 }, { "epoch": 0.3044888206178529, "grad_norm": 0.2888681335864498, "learning_rate": 4.992412746585736e-05, "loss": 0.3714, "num_tokens": 170389025.0, "step": 892 }, { "epoch": 0.3048301757979177, "grad_norm": 0.3156490183448409, "learning_rate": 4.9917804754678806e-05, "loss": 0.4028, "num_tokens": 170548846.0, "step": 893 }, { "epoch": 0.3051715309779826, "grad_norm": 0.3022596645304124, "learning_rate": 4.991148204350026e-05, "loss": 0.3643, "num_tokens": 170743417.0, "step": 894 }, { "epoch": 0.30551288615804745, "grad_norm": 0.26417735601036263, "learning_rate": 4.99051593323217e-05, "loss": 0.3521, "num_tokens": 170917766.0, "step": 895 }, { "epoch": 0.3058542413381123, "grad_norm": 0.33301634541072794, "learning_rate": 4.989883662114315e-05, "loss": 0.3958, "num_tokens": 171120325.0, "step": 896 }, { "epoch": 0.3061955965181772, "grad_norm": 0.27362862906236635, "learning_rate": 4.9892513909964594e-05, "loss": 0.3393, "num_tokens": 171341728.0, "step": 897 }, { "epoch": 0.306536951698242, "grad_norm": 0.2672537111762941, "learning_rate": 4.988619119878604e-05, "loss": 0.3421, "num_tokens": 171527705.0, "step": 898 }, { "epoch": 0.30687830687830686, "grad_norm": 0.3049607886812532, "learning_rate": 4.987986848760749e-05, "loss": 0.382, "num_tokens": 171707399.0, "step": 899 }, { "epoch": 0.30721966205837176, "grad_norm": 0.2858640072026063, "learning_rate": 4.9873545776428935e-05, "loss": 0.4033, "num_tokens": 171933823.0, "step": 900 }, { "epoch": 0.3075610172384366, "grad_norm": 0.27601677416282916, "learning_rate": 4.986722306525038e-05, "loss": 0.3729, "num_tokens": 172165166.0, "step": 901 }, { "epoch": 0.30790237241850144, "grad_norm": 0.3024770395086871, "learning_rate": 4.986090035407183e-05, "loss": 0.3948, "num_tokens": 172339510.0, "step": 902 }, { "epoch": 0.30824372759856633, "grad_norm": 0.25800021655272604, "learning_rate": 4.9854577642893276e-05, "loss": 0.3787, "num_tokens": 172565037.0, "step": 903 }, { "epoch": 0.30858508277863117, "grad_norm": 0.27313793656059693, "learning_rate": 4.984825493171472e-05, "loss": 0.3676, "num_tokens": 172755512.0, "step": 904 }, { "epoch": 0.308926437958696, "grad_norm": 0.29510550954934567, "learning_rate": 4.984193222053617e-05, "loss": 0.3961, "num_tokens": 172967256.0, "step": 905 }, { "epoch": 0.3092677931387609, "grad_norm": 0.31188549325952664, "learning_rate": 4.983560950935762e-05, "loss": 0.3856, "num_tokens": 173129980.0, "step": 906 }, { "epoch": 0.30960914831882574, "grad_norm": 0.2860286889728926, "learning_rate": 4.982928679817906e-05, "loss": 0.3736, "num_tokens": 173317549.0, "step": 907 }, { "epoch": 0.3099505034988906, "grad_norm": 0.29194259510771103, "learning_rate": 4.9822964087000504e-05, "loss": 0.4095, "num_tokens": 173513591.0, "step": 908 }, { "epoch": 0.3102918586789555, "grad_norm": 0.3146248744328213, "learning_rate": 4.981664137582195e-05, "loss": 0.3586, "num_tokens": 173686774.0, "step": 909 }, { "epoch": 0.3106332138590203, "grad_norm": 0.2890923870047794, "learning_rate": 4.98103186646434e-05, "loss": 0.4285, "num_tokens": 173886081.0, "step": 910 }, { "epoch": 0.31097456903908516, "grad_norm": 0.26226523582436756, "learning_rate": 4.980399595346485e-05, "loss": 0.3534, "num_tokens": 174107985.0, "step": 911 }, { "epoch": 0.31131592421915005, "grad_norm": 0.3124426528194895, "learning_rate": 4.97976732422863e-05, "loss": 0.4098, "num_tokens": 174298000.0, "step": 912 }, { "epoch": 0.3116572793992149, "grad_norm": 0.26155255423615753, "learning_rate": 4.979135053110774e-05, "loss": 0.358, "num_tokens": 174489877.0, "step": 913 }, { "epoch": 0.31199863457927973, "grad_norm": 0.2917443659279167, "learning_rate": 4.9785027819929186e-05, "loss": 0.4207, "num_tokens": 174713936.0, "step": 914 }, { "epoch": 0.31233998975934457, "grad_norm": 0.27522262046789325, "learning_rate": 4.977870510875063e-05, "loss": 0.4035, "num_tokens": 174924031.0, "step": 915 }, { "epoch": 0.31268134493940947, "grad_norm": 0.26605024069784033, "learning_rate": 4.977238239757208e-05, "loss": 0.3638, "num_tokens": 175160721.0, "step": 916 }, { "epoch": 0.3130227001194743, "grad_norm": 0.29327262670653154, "learning_rate": 4.976605968639353e-05, "loss": 0.3714, "num_tokens": 175358940.0, "step": 917 }, { "epoch": 0.31336405529953915, "grad_norm": 0.5779958701524818, "learning_rate": 4.9759736975214974e-05, "loss": 0.3612, "num_tokens": 175502814.0, "step": 918 }, { "epoch": 0.31370541047960404, "grad_norm": 0.3013977840003158, "learning_rate": 4.975341426403642e-05, "loss": 0.3837, "num_tokens": 175697806.0, "step": 919 }, { "epoch": 0.3140467656596689, "grad_norm": 0.2884413109470343, "learning_rate": 4.974709155285787e-05, "loss": 0.3828, "num_tokens": 175888190.0, "step": 920 }, { "epoch": 0.3143881208397337, "grad_norm": 0.2801402097281282, "learning_rate": 4.9740768841679315e-05, "loss": 0.4009, "num_tokens": 176115913.0, "step": 921 }, { "epoch": 0.3147294760197986, "grad_norm": 0.2829802859344225, "learning_rate": 4.973444613050076e-05, "loss": 0.3619, "num_tokens": 176282976.0, "step": 922 }, { "epoch": 0.31507083119986345, "grad_norm": 0.25736027431042935, "learning_rate": 4.972812341932221e-05, "loss": 0.3813, "num_tokens": 176494376.0, "step": 923 }, { "epoch": 0.3154121863799283, "grad_norm": 0.2653257551386209, "learning_rate": 4.9721800708143656e-05, "loss": 0.3592, "num_tokens": 176700829.0, "step": 924 }, { "epoch": 0.3157535415599932, "grad_norm": 0.2769720347892331, "learning_rate": 4.9715477996965096e-05, "loss": 0.3622, "num_tokens": 176858997.0, "step": 925 }, { "epoch": 0.31609489674005803, "grad_norm": 0.26403657053897117, "learning_rate": 4.970915528578654e-05, "loss": 0.3673, "num_tokens": 177065191.0, "step": 926 }, { "epoch": 0.31643625192012287, "grad_norm": 0.26931645803893073, "learning_rate": 4.9702832574608e-05, "loss": 0.356, "num_tokens": 177252555.0, "step": 927 }, { "epoch": 0.31677760710018776, "grad_norm": 0.2632167521770642, "learning_rate": 4.9696509863429444e-05, "loss": 0.3685, "num_tokens": 177455882.0, "step": 928 }, { "epoch": 0.3171189622802526, "grad_norm": 0.2747005243964522, "learning_rate": 4.969018715225089e-05, "loss": 0.3704, "num_tokens": 177624060.0, "step": 929 }, { "epoch": 0.31746031746031744, "grad_norm": 0.24817753877222462, "learning_rate": 4.968386444107234e-05, "loss": 0.3555, "num_tokens": 177823957.0, "step": 930 }, { "epoch": 0.31780167264038234, "grad_norm": 0.2842360731776269, "learning_rate": 4.967754172989378e-05, "loss": 0.3921, "num_tokens": 178009588.0, "step": 931 }, { "epoch": 0.3181430278204472, "grad_norm": 0.2824955307448915, "learning_rate": 4.9671219018715225e-05, "loss": 0.3919, "num_tokens": 178193041.0, "step": 932 }, { "epoch": 0.318484383000512, "grad_norm": 0.2742022072852163, "learning_rate": 4.966489630753667e-05, "loss": 0.4032, "num_tokens": 178386055.0, "step": 933 }, { "epoch": 0.3188257381805769, "grad_norm": 0.261166479139818, "learning_rate": 4.965857359635812e-05, "loss": 0.34, "num_tokens": 178571640.0, "step": 934 }, { "epoch": 0.31916709336064175, "grad_norm": 0.2988549487082451, "learning_rate": 4.9652250885179566e-05, "loss": 0.39, "num_tokens": 178762099.0, "step": 935 }, { "epoch": 0.3195084485407066, "grad_norm": 0.27965493597534213, "learning_rate": 4.964592817400101e-05, "loss": 0.3669, "num_tokens": 178912853.0, "step": 936 }, { "epoch": 0.3198498037207715, "grad_norm": 0.2678150334007383, "learning_rate": 4.963960546282246e-05, "loss": 0.4067, "num_tokens": 179117328.0, "step": 937 }, { "epoch": 0.3201911589008363, "grad_norm": 0.2599423965677788, "learning_rate": 4.963328275164391e-05, "loss": 0.3729, "num_tokens": 179309762.0, "step": 938 }, { "epoch": 0.32053251408090117, "grad_norm": 0.25920744335314455, "learning_rate": 4.9626960040465354e-05, "loss": 0.3902, "num_tokens": 179516554.0, "step": 939 }, { "epoch": 0.32087386926096606, "grad_norm": 0.26371864467859824, "learning_rate": 4.96206373292868e-05, "loss": 0.3801, "num_tokens": 179715091.0, "step": 940 }, { "epoch": 0.3212152244410309, "grad_norm": 0.2603260365333197, "learning_rate": 4.961431461810825e-05, "loss": 0.3923, "num_tokens": 179922222.0, "step": 941 }, { "epoch": 0.32155657962109574, "grad_norm": 0.27433065796821904, "learning_rate": 4.9607991906929695e-05, "loss": 0.4006, "num_tokens": 180129899.0, "step": 942 }, { "epoch": 0.32189793480116063, "grad_norm": 0.2738280264719819, "learning_rate": 4.9601669195751135e-05, "loss": 0.3934, "num_tokens": 180306370.0, "step": 943 }, { "epoch": 0.3222392899812255, "grad_norm": 0.27911516913762013, "learning_rate": 4.959534648457259e-05, "loss": 0.3771, "num_tokens": 180486502.0, "step": 944 }, { "epoch": 0.3225806451612903, "grad_norm": 0.2779765603360511, "learning_rate": 4.9589023773394036e-05, "loss": 0.3722, "num_tokens": 180660556.0, "step": 945 }, { "epoch": 0.32292200034135515, "grad_norm": 0.304359393353634, "learning_rate": 4.958270106221548e-05, "loss": 0.4034, "num_tokens": 180835272.0, "step": 946 }, { "epoch": 0.32326335552142005, "grad_norm": 0.26861030062332514, "learning_rate": 4.957637835103693e-05, "loss": 0.3966, "num_tokens": 181080068.0, "step": 947 }, { "epoch": 0.3236047107014849, "grad_norm": 0.28306282386142173, "learning_rate": 4.957005563985838e-05, "loss": 0.4068, "num_tokens": 181294109.0, "step": 948 }, { "epoch": 0.3239460658815497, "grad_norm": 0.24929389136866703, "learning_rate": 4.956373292867982e-05, "loss": 0.3674, "num_tokens": 181495257.0, "step": 949 }, { "epoch": 0.3242874210616146, "grad_norm": 0.2827751075250707, "learning_rate": 4.9557410217501264e-05, "loss": 0.3405, "num_tokens": 181653264.0, "step": 950 }, { "epoch": 0.32462877624167946, "grad_norm": 0.2809278502079261, "learning_rate": 4.955108750632271e-05, "loss": 0.3425, "num_tokens": 181828467.0, "step": 951 }, { "epoch": 0.3249701314217443, "grad_norm": 0.3065146055590122, "learning_rate": 4.954476479514416e-05, "loss": 0.3874, "num_tokens": 182035916.0, "step": 952 }, { "epoch": 0.3253114866018092, "grad_norm": 0.28714768581221106, "learning_rate": 4.9538442083965605e-05, "loss": 0.3831, "num_tokens": 182206629.0, "step": 953 }, { "epoch": 0.32565284178187404, "grad_norm": 0.2568467694773573, "learning_rate": 4.953211937278706e-05, "loss": 0.3724, "num_tokens": 182403795.0, "step": 954 }, { "epoch": 0.3259941969619389, "grad_norm": 0.29881508483949426, "learning_rate": 4.95257966616085e-05, "loss": 0.4059, "num_tokens": 182593781.0, "step": 955 }, { "epoch": 0.32633555214200377, "grad_norm": 0.29575413066781087, "learning_rate": 4.9519473950429946e-05, "loss": 0.3746, "num_tokens": 182763781.0, "step": 956 }, { "epoch": 0.3266769073220686, "grad_norm": 0.26510633783591875, "learning_rate": 4.951315123925139e-05, "loss": 0.35, "num_tokens": 182939878.0, "step": 957 }, { "epoch": 0.32701826250213345, "grad_norm": 0.30398672525482173, "learning_rate": 4.950682852807284e-05, "loss": 0.4074, "num_tokens": 183138070.0, "step": 958 }, { "epoch": 0.32735961768219835, "grad_norm": 0.2722711203861314, "learning_rate": 4.950050581689429e-05, "loss": 0.3539, "num_tokens": 183301046.0, "step": 959 }, { "epoch": 0.3277009728622632, "grad_norm": 0.25813876697398347, "learning_rate": 4.9494183105715734e-05, "loss": 0.4049, "num_tokens": 183539105.0, "step": 960 }, { "epoch": 0.328042328042328, "grad_norm": 0.28351630111410475, "learning_rate": 4.948786039453718e-05, "loss": 0.3966, "num_tokens": 183714717.0, "step": 961 }, { "epoch": 0.3283836832223929, "grad_norm": 0.28748967431449823, "learning_rate": 4.948153768335863e-05, "loss": 0.3706, "num_tokens": 183880892.0, "step": 962 }, { "epoch": 0.32872503840245776, "grad_norm": 0.24906006868151603, "learning_rate": 4.9475214972180075e-05, "loss": 0.3812, "num_tokens": 184084709.0, "step": 963 }, { "epoch": 0.3290663935825226, "grad_norm": 0.29497988535605174, "learning_rate": 4.946889226100152e-05, "loss": 0.3748, "num_tokens": 184271059.0, "step": 964 }, { "epoch": 0.3294077487625875, "grad_norm": 0.24159428963018664, "learning_rate": 4.946256954982297e-05, "loss": 0.3629, "num_tokens": 184498498.0, "step": 965 }, { "epoch": 0.32974910394265233, "grad_norm": 0.27694668664946454, "learning_rate": 4.9456246838644416e-05, "loss": 0.4173, "num_tokens": 184702125.0, "step": 966 }, { "epoch": 0.3300904591227172, "grad_norm": 0.3451073138739836, "learning_rate": 4.9449924127465856e-05, "loss": 0.4003, "num_tokens": 184907999.0, "step": 967 }, { "epoch": 0.33043181430278207, "grad_norm": 0.30209024164830556, "learning_rate": 4.94436014162873e-05, "loss": 0.4038, "num_tokens": 185074018.0, "step": 968 }, { "epoch": 0.3307731694828469, "grad_norm": 0.2812405940695385, "learning_rate": 4.943727870510875e-05, "loss": 0.3902, "num_tokens": 185289586.0, "step": 969 }, { "epoch": 0.33111452466291175, "grad_norm": 0.4806022136772176, "learning_rate": 4.94309559939302e-05, "loss": 0.445, "num_tokens": 185477420.0, "step": 970 }, { "epoch": 0.33145587984297664, "grad_norm": 0.2978875061481236, "learning_rate": 4.942463328275165e-05, "loss": 0.3962, "num_tokens": 185702496.0, "step": 971 }, { "epoch": 0.3317972350230415, "grad_norm": 0.2571263794894636, "learning_rate": 4.94183105715731e-05, "loss": 0.3665, "num_tokens": 185911348.0, "step": 972 }, { "epoch": 0.3321385902031063, "grad_norm": 0.4504735677377567, "learning_rate": 4.941198786039454e-05, "loss": 0.3778, "num_tokens": 186106773.0, "step": 973 }, { "epoch": 0.3324799453831712, "grad_norm": 0.26090133643707736, "learning_rate": 4.9405665149215985e-05, "loss": 0.3991, "num_tokens": 186328825.0, "step": 974 }, { "epoch": 0.33282130056323606, "grad_norm": 0.29858472881614667, "learning_rate": 4.939934243803743e-05, "loss": 0.3871, "num_tokens": 186492039.0, "step": 975 }, { "epoch": 0.3331626557433009, "grad_norm": 0.25570694336734207, "learning_rate": 4.939301972685888e-05, "loss": 0.3604, "num_tokens": 186707117.0, "step": 976 }, { "epoch": 0.33350401092336573, "grad_norm": 0.26510350977846026, "learning_rate": 4.9386697015680326e-05, "loss": 0.3457, "num_tokens": 186913656.0, "step": 977 }, { "epoch": 0.33384536610343063, "grad_norm": 0.31682215050956075, "learning_rate": 4.938037430450177e-05, "loss": 0.4479, "num_tokens": 187106344.0, "step": 978 }, { "epoch": 0.33418672128349547, "grad_norm": 0.26819810505752606, "learning_rate": 4.937405159332322e-05, "loss": 0.3764, "num_tokens": 187306267.0, "step": 979 }, { "epoch": 0.3345280764635603, "grad_norm": 0.3490168125677608, "learning_rate": 4.936772888214467e-05, "loss": 0.4037, "num_tokens": 187483652.0, "step": 980 }, { "epoch": 0.3348694316436252, "grad_norm": 0.26814259266961754, "learning_rate": 4.9361406170966114e-05, "loss": 0.3809, "num_tokens": 187684533.0, "step": 981 }, { "epoch": 0.33521078682369004, "grad_norm": 0.30203864370871014, "learning_rate": 4.935508345978756e-05, "loss": 0.4019, "num_tokens": 187866216.0, "step": 982 }, { "epoch": 0.3355521420037549, "grad_norm": 0.2827608099649667, "learning_rate": 4.934876074860901e-05, "loss": 0.3717, "num_tokens": 188048598.0, "step": 983 }, { "epoch": 0.3358934971838198, "grad_norm": 0.27914589507057425, "learning_rate": 4.9342438037430455e-05, "loss": 0.3523, "num_tokens": 188213649.0, "step": 984 }, { "epoch": 0.3362348523638846, "grad_norm": 0.33396103811618494, "learning_rate": 4.9336115326251895e-05, "loss": 0.3843, "num_tokens": 188404072.0, "step": 985 }, { "epoch": 0.33657620754394946, "grad_norm": 0.25322662088047143, "learning_rate": 4.932979261507334e-05, "loss": 0.3703, "num_tokens": 188645804.0, "step": 986 }, { "epoch": 0.33691756272401435, "grad_norm": 0.29654185752504464, "learning_rate": 4.932346990389479e-05, "loss": 0.3945, "num_tokens": 188821623.0, "step": 987 }, { "epoch": 0.3372589179040792, "grad_norm": 0.27894386652743547, "learning_rate": 4.931714719271624e-05, "loss": 0.3752, "num_tokens": 189002486.0, "step": 988 }, { "epoch": 0.33760027308414403, "grad_norm": 0.265848281485721, "learning_rate": 4.931082448153769e-05, "loss": 0.358, "num_tokens": 189183860.0, "step": 989 }, { "epoch": 0.3379416282642089, "grad_norm": 0.3514517637156316, "learning_rate": 4.930450177035914e-05, "loss": 0.3922, "num_tokens": 189363571.0, "step": 990 }, { "epoch": 0.33828298344427377, "grad_norm": 0.2704717657523743, "learning_rate": 4.929817905918058e-05, "loss": 0.3857, "num_tokens": 189521836.0, "step": 991 }, { "epoch": 0.3386243386243386, "grad_norm": 0.2533934634694538, "learning_rate": 4.9291856348002024e-05, "loss": 0.3508, "num_tokens": 189711296.0, "step": 992 }, { "epoch": 0.3389656938044035, "grad_norm": 0.2914988427110772, "learning_rate": 4.928553363682347e-05, "loss": 0.3942, "num_tokens": 189901307.0, "step": 993 }, { "epoch": 0.33930704898446834, "grad_norm": 0.2949858848553957, "learning_rate": 4.927921092564492e-05, "loss": 0.3846, "num_tokens": 190086568.0, "step": 994 }, { "epoch": 0.3396484041645332, "grad_norm": 0.23235005595523137, "learning_rate": 4.9272888214466365e-05, "loss": 0.3915, "num_tokens": 190344278.0, "step": 995 }, { "epoch": 0.3399897593445981, "grad_norm": 0.2565255965905156, "learning_rate": 4.926656550328781e-05, "loss": 0.3749, "num_tokens": 190556536.0, "step": 996 }, { "epoch": 0.3403311145246629, "grad_norm": 0.289520652492843, "learning_rate": 4.926024279210926e-05, "loss": 0.3794, "num_tokens": 190742124.0, "step": 997 }, { "epoch": 0.34067246970472775, "grad_norm": 0.25434382932979066, "learning_rate": 4.9253920080930706e-05, "loss": 0.3726, "num_tokens": 190929900.0, "step": 998 }, { "epoch": 0.34101382488479265, "grad_norm": 0.2556656282483584, "learning_rate": 4.924759736975215e-05, "loss": 0.393, "num_tokens": 191120680.0, "step": 999 }, { "epoch": 0.3413551800648575, "grad_norm": 0.2579520485325576, "learning_rate": 4.92412746585736e-05, "loss": 0.3719, "num_tokens": 191296064.0, "step": 1000 }, { "epoch": 0.34169653524492233, "grad_norm": 0.2833925370247385, "learning_rate": 4.923495194739505e-05, "loss": 0.3604, "num_tokens": 191445615.0, "step": 1001 }, { "epoch": 0.3420378904249872, "grad_norm": 0.2591298037552647, "learning_rate": 4.9228629236216494e-05, "loss": 0.3942, "num_tokens": 191681554.0, "step": 1002 }, { "epoch": 0.34237924560505206, "grad_norm": 0.24990194143748234, "learning_rate": 4.9222306525037934e-05, "loss": 0.3634, "num_tokens": 191904080.0, "step": 1003 }, { "epoch": 0.3427206007851169, "grad_norm": 0.26711473796515917, "learning_rate": 4.921598381385939e-05, "loss": 0.3835, "num_tokens": 192089607.0, "step": 1004 }, { "epoch": 0.3430619559651818, "grad_norm": 0.27041972292882327, "learning_rate": 4.9209661102680835e-05, "loss": 0.386, "num_tokens": 192284667.0, "step": 1005 }, { "epoch": 0.34340331114524664, "grad_norm": 0.25201776923107716, "learning_rate": 4.920333839150228e-05, "loss": 0.3677, "num_tokens": 192476958.0, "step": 1006 }, { "epoch": 0.3437446663253115, "grad_norm": 0.2769468732733714, "learning_rate": 4.919701568032373e-05, "loss": 0.3836, "num_tokens": 192650567.0, "step": 1007 }, { "epoch": 0.34408602150537637, "grad_norm": 0.29083875283978394, "learning_rate": 4.9190692969145176e-05, "loss": 0.409, "num_tokens": 192824013.0, "step": 1008 }, { "epoch": 0.3444273766854412, "grad_norm": 0.26544265668066025, "learning_rate": 4.9184370257966616e-05, "loss": 0.3818, "num_tokens": 193017308.0, "step": 1009 }, { "epoch": 0.34476873186550605, "grad_norm": 0.27203751350768546, "learning_rate": 4.917804754678806e-05, "loss": 0.3661, "num_tokens": 193197758.0, "step": 1010 }, { "epoch": 0.3451100870455709, "grad_norm": 0.2586266835907114, "learning_rate": 4.917172483560951e-05, "loss": 0.3692, "num_tokens": 193384910.0, "step": 1011 }, { "epoch": 0.3454514422256358, "grad_norm": 0.2705730561776927, "learning_rate": 4.916540212443096e-05, "loss": 0.3804, "num_tokens": 193591694.0, "step": 1012 }, { "epoch": 0.3457927974057006, "grad_norm": 0.2884826906808621, "learning_rate": 4.9159079413252404e-05, "loss": 0.3746, "num_tokens": 193783659.0, "step": 1013 }, { "epoch": 0.34613415258576546, "grad_norm": 0.26425295581754377, "learning_rate": 4.915275670207385e-05, "loss": 0.394, "num_tokens": 193969828.0, "step": 1014 }, { "epoch": 0.34647550776583036, "grad_norm": 0.2784405748352311, "learning_rate": 4.91464339908953e-05, "loss": 0.3989, "num_tokens": 194134395.0, "step": 1015 }, { "epoch": 0.3468168629458952, "grad_norm": 0.25843099884206877, "learning_rate": 4.9140111279716745e-05, "loss": 0.366, "num_tokens": 194342880.0, "step": 1016 }, { "epoch": 0.34715821812596004, "grad_norm": 0.27808030545098916, "learning_rate": 4.913378856853819e-05, "loss": 0.4203, "num_tokens": 194533827.0, "step": 1017 }, { "epoch": 0.34749957330602493, "grad_norm": 0.28373435565445415, "learning_rate": 4.912746585735964e-05, "loss": 0.3969, "num_tokens": 194750320.0, "step": 1018 }, { "epoch": 0.3478409284860898, "grad_norm": 0.25070744719781973, "learning_rate": 4.9121143146181086e-05, "loss": 0.3609, "num_tokens": 194948327.0, "step": 1019 }, { "epoch": 0.3481822836661546, "grad_norm": 0.2519576029466643, "learning_rate": 4.9114820435002526e-05, "loss": 0.3628, "num_tokens": 195144026.0, "step": 1020 }, { "epoch": 0.3485236388462195, "grad_norm": 0.25756285511257815, "learning_rate": 4.910849772382398e-05, "loss": 0.356, "num_tokens": 195376173.0, "step": 1021 }, { "epoch": 0.34886499402628435, "grad_norm": 0.2756911175360894, "learning_rate": 4.9102175012645427e-05, "loss": 0.3941, "num_tokens": 195564714.0, "step": 1022 }, { "epoch": 0.3492063492063492, "grad_norm": 0.262296030890485, "learning_rate": 4.9095852301466874e-05, "loss": 0.3529, "num_tokens": 195739010.0, "step": 1023 }, { "epoch": 0.3495477043864141, "grad_norm": 0.2495697282122731, "learning_rate": 4.908952959028832e-05, "loss": 0.3617, "num_tokens": 195910136.0, "step": 1024 }, { "epoch": 0.3498890595664789, "grad_norm": 0.27114995967741173, "learning_rate": 4.908320687910977e-05, "loss": 0.3825, "num_tokens": 196106552.0, "step": 1025 }, { "epoch": 0.35023041474654376, "grad_norm": 0.25405883273136576, "learning_rate": 4.907688416793121e-05, "loss": 0.348, "num_tokens": 196289759.0, "step": 1026 }, { "epoch": 0.35057176992660866, "grad_norm": 0.24979634374187332, "learning_rate": 4.9070561456752655e-05, "loss": 0.3551, "num_tokens": 196477016.0, "step": 1027 }, { "epoch": 0.3509131251066735, "grad_norm": 0.27748642377166727, "learning_rate": 4.90642387455741e-05, "loss": 0.4112, "num_tokens": 196669066.0, "step": 1028 }, { "epoch": 0.35125448028673834, "grad_norm": 0.252251994954191, "learning_rate": 4.905791603439555e-05, "loss": 0.3604, "num_tokens": 196847948.0, "step": 1029 }, { "epoch": 0.35159583546680323, "grad_norm": 0.2603370957581134, "learning_rate": 4.9051593323216996e-05, "loss": 0.3672, "num_tokens": 197028705.0, "step": 1030 }, { "epoch": 0.35193719064686807, "grad_norm": 0.2883190255860796, "learning_rate": 4.904527061203845e-05, "loss": 0.4054, "num_tokens": 197208967.0, "step": 1031 }, { "epoch": 0.3522785458269329, "grad_norm": 0.2725274974834197, "learning_rate": 4.903894790085989e-05, "loss": 0.4039, "num_tokens": 197434836.0, "step": 1032 }, { "epoch": 0.3526199010069978, "grad_norm": 0.27642149280910017, "learning_rate": 4.9032625189681337e-05, "loss": 0.3707, "num_tokens": 197621851.0, "step": 1033 }, { "epoch": 0.35296125618706264, "grad_norm": 0.3547884626058784, "learning_rate": 4.9026302478502784e-05, "loss": 0.3765, "num_tokens": 197825295.0, "step": 1034 }, { "epoch": 0.3533026113671275, "grad_norm": 0.2591407894904427, "learning_rate": 4.901997976732423e-05, "loss": 0.3545, "num_tokens": 198005468.0, "step": 1035 }, { "epoch": 0.3536439665471924, "grad_norm": 0.296662614492229, "learning_rate": 4.901365705614568e-05, "loss": 0.3834, "num_tokens": 198198708.0, "step": 1036 }, { "epoch": 0.3539853217272572, "grad_norm": 0.2689783418914486, "learning_rate": 4.9007334344967125e-05, "loss": 0.3555, "num_tokens": 198375648.0, "step": 1037 }, { "epoch": 0.35432667690732206, "grad_norm": 0.26127231260135486, "learning_rate": 4.900101163378857e-05, "loss": 0.338, "num_tokens": 198564026.0, "step": 1038 }, { "epoch": 0.35466803208738695, "grad_norm": 0.24588521346244488, "learning_rate": 4.899468892261002e-05, "loss": 0.3647, "num_tokens": 198768026.0, "step": 1039 }, { "epoch": 0.3550093872674518, "grad_norm": 0.23708021350339883, "learning_rate": 4.8988366211431465e-05, "loss": 0.378, "num_tokens": 198986148.0, "step": 1040 }, { "epoch": 0.35535074244751663, "grad_norm": 0.27574482008198886, "learning_rate": 4.898204350025291e-05, "loss": 0.3859, "num_tokens": 199203950.0, "step": 1041 }, { "epoch": 0.35569209762758147, "grad_norm": 0.30290099699895084, "learning_rate": 4.897572078907436e-05, "loss": 0.4095, "num_tokens": 199356072.0, "step": 1042 }, { "epoch": 0.35603345280764637, "grad_norm": 0.24729555163656278, "learning_rate": 4.8969398077895806e-05, "loss": 0.3605, "num_tokens": 199588155.0, "step": 1043 }, { "epoch": 0.3563748079877112, "grad_norm": 0.24434296647282194, "learning_rate": 4.896307536671725e-05, "loss": 0.349, "num_tokens": 199761795.0, "step": 1044 }, { "epoch": 0.35671616316777605, "grad_norm": 0.29526791701192756, "learning_rate": 4.8956752655538694e-05, "loss": 0.4038, "num_tokens": 199917741.0, "step": 1045 }, { "epoch": 0.35705751834784094, "grad_norm": 0.2909011107425244, "learning_rate": 4.895042994436014e-05, "loss": 0.3911, "num_tokens": 200109713.0, "step": 1046 }, { "epoch": 0.3573988735279058, "grad_norm": 0.27943811890223696, "learning_rate": 4.894410723318159e-05, "loss": 0.4009, "num_tokens": 200312962.0, "step": 1047 }, { "epoch": 0.3577402287079706, "grad_norm": 0.30291134183763335, "learning_rate": 4.893778452200304e-05, "loss": 0.4622, "num_tokens": 200503696.0, "step": 1048 }, { "epoch": 0.3580815838880355, "grad_norm": 0.27826620312104733, "learning_rate": 4.893146181082449e-05, "loss": 0.4027, "num_tokens": 200694571.0, "step": 1049 }, { "epoch": 0.35842293906810035, "grad_norm": 0.270013279445057, "learning_rate": 4.892513909964593e-05, "loss": 0.3902, "num_tokens": 200867966.0, "step": 1050 }, { "epoch": 0.3587642942481652, "grad_norm": 0.29713686492257063, "learning_rate": 4.8918816388467376e-05, "loss": 0.3732, "num_tokens": 201037085.0, "step": 1051 }, { "epoch": 0.3591056494282301, "grad_norm": 0.2626858384614575, "learning_rate": 4.891249367728882e-05, "loss": 0.3595, "num_tokens": 201206056.0, "step": 1052 }, { "epoch": 0.35944700460829493, "grad_norm": 0.295411755467436, "learning_rate": 4.890617096611027e-05, "loss": 0.4136, "num_tokens": 201415365.0, "step": 1053 }, { "epoch": 0.35978835978835977, "grad_norm": 0.31710817839657685, "learning_rate": 4.8899848254931716e-05, "loss": 0.3701, "num_tokens": 201590210.0, "step": 1054 }, { "epoch": 0.36012971496842466, "grad_norm": 0.2798096924104606, "learning_rate": 4.8893525543753163e-05, "loss": 0.3764, "num_tokens": 201768429.0, "step": 1055 }, { "epoch": 0.3604710701484895, "grad_norm": 0.2947110939951046, "learning_rate": 4.888720283257461e-05, "loss": 0.4074, "num_tokens": 201957584.0, "step": 1056 }, { "epoch": 0.36081242532855434, "grad_norm": 0.29873884510499826, "learning_rate": 4.888088012139606e-05, "loss": 0.3936, "num_tokens": 202141230.0, "step": 1057 }, { "epoch": 0.36115378050861924, "grad_norm": 0.29614501793428455, "learning_rate": 4.8874557410217504e-05, "loss": 0.3893, "num_tokens": 202340364.0, "step": 1058 }, { "epoch": 0.3614951356886841, "grad_norm": 0.30854706730581927, "learning_rate": 4.886823469903895e-05, "loss": 0.3547, "num_tokens": 202480774.0, "step": 1059 }, { "epoch": 0.3618364908687489, "grad_norm": 0.29471620596160675, "learning_rate": 4.88619119878604e-05, "loss": 0.3548, "num_tokens": 202667697.0, "step": 1060 }, { "epoch": 0.3621778460488138, "grad_norm": 0.3079941658439344, "learning_rate": 4.8855589276681845e-05, "loss": 0.379, "num_tokens": 202842943.0, "step": 1061 }, { "epoch": 0.36251920122887865, "grad_norm": 0.2968844958264309, "learning_rate": 4.8849266565503286e-05, "loss": 0.3718, "num_tokens": 202992390.0, "step": 1062 }, { "epoch": 0.3628605564089435, "grad_norm": 0.30177940722258817, "learning_rate": 4.884294385432473e-05, "loss": 0.3851, "num_tokens": 203132963.0, "step": 1063 }, { "epoch": 0.3632019115890084, "grad_norm": 0.24700289902883393, "learning_rate": 4.883662114314618e-05, "loss": 0.3742, "num_tokens": 203335079.0, "step": 1064 }, { "epoch": 0.3635432667690732, "grad_norm": 0.28546148147468825, "learning_rate": 4.883029843196763e-05, "loss": 0.3807, "num_tokens": 203536993.0, "step": 1065 }, { "epoch": 0.36388462194913807, "grad_norm": 0.2686139157817988, "learning_rate": 4.882397572078908e-05, "loss": 0.3519, "num_tokens": 203714534.0, "step": 1066 }, { "epoch": 0.36422597712920296, "grad_norm": 0.26646664268455794, "learning_rate": 4.881765300961053e-05, "loss": 0.3903, "num_tokens": 203914166.0, "step": 1067 }, { "epoch": 0.3645673323092678, "grad_norm": 0.2719300636982518, "learning_rate": 4.881133029843197e-05, "loss": 0.3849, "num_tokens": 204112694.0, "step": 1068 }, { "epoch": 0.36490868748933264, "grad_norm": 0.28913456693861567, "learning_rate": 4.8805007587253414e-05, "loss": 0.3546, "num_tokens": 204345318.0, "step": 1069 }, { "epoch": 0.36525004266939753, "grad_norm": 0.2968980686700756, "learning_rate": 4.879868487607486e-05, "loss": 0.4168, "num_tokens": 204527538.0, "step": 1070 }, { "epoch": 0.3655913978494624, "grad_norm": 0.290198698995305, "learning_rate": 4.879236216489631e-05, "loss": 0.3855, "num_tokens": 204745971.0, "step": 1071 }, { "epoch": 0.3659327530295272, "grad_norm": 0.3326636918450848, "learning_rate": 4.8786039453717755e-05, "loss": 0.3783, "num_tokens": 204929351.0, "step": 1072 }, { "epoch": 0.36627410820959205, "grad_norm": 0.2781138410156981, "learning_rate": 4.87797167425392e-05, "loss": 0.3823, "num_tokens": 205115526.0, "step": 1073 }, { "epoch": 0.36661546338965695, "grad_norm": 0.2802700513989119, "learning_rate": 4.877339403136065e-05, "loss": 0.3581, "num_tokens": 205294286.0, "step": 1074 }, { "epoch": 0.3669568185697218, "grad_norm": 0.295496115471746, "learning_rate": 4.8767071320182096e-05, "loss": 0.4147, "num_tokens": 205485346.0, "step": 1075 }, { "epoch": 0.3672981737497866, "grad_norm": 0.25675504263687066, "learning_rate": 4.876074860900354e-05, "loss": 0.3577, "num_tokens": 205712707.0, "step": 1076 }, { "epoch": 0.3676395289298515, "grad_norm": 0.28062861894803015, "learning_rate": 4.875442589782499e-05, "loss": 0.394, "num_tokens": 205919949.0, "step": 1077 }, { "epoch": 0.36798088410991636, "grad_norm": 0.27442206490822496, "learning_rate": 4.874810318664644e-05, "loss": 0.383, "num_tokens": 206151550.0, "step": 1078 }, { "epoch": 0.3683222392899812, "grad_norm": 0.26451426812960555, "learning_rate": 4.8741780475467884e-05, "loss": 0.363, "num_tokens": 206316461.0, "step": 1079 }, { "epoch": 0.3686635944700461, "grad_norm": 0.2741114187912249, "learning_rate": 4.8735457764289324e-05, "loss": 0.3776, "num_tokens": 206517331.0, "step": 1080 }, { "epoch": 0.36900494965011094, "grad_norm": 0.28341548498618396, "learning_rate": 4.872913505311078e-05, "loss": 0.3955, "num_tokens": 206711878.0, "step": 1081 }, { "epoch": 0.3693463048301758, "grad_norm": 0.2701112660575927, "learning_rate": 4.8722812341932225e-05, "loss": 0.3914, "num_tokens": 206909858.0, "step": 1082 }, { "epoch": 0.36968766001024067, "grad_norm": 0.2504943198744966, "learning_rate": 4.871648963075367e-05, "loss": 0.3261, "num_tokens": 207112629.0, "step": 1083 }, { "epoch": 0.3700290151903055, "grad_norm": 0.27992158404920153, "learning_rate": 4.871016691957512e-05, "loss": 0.4003, "num_tokens": 207297737.0, "step": 1084 }, { "epoch": 0.37037037037037035, "grad_norm": 0.26746648308803206, "learning_rate": 4.8703844208396566e-05, "loss": 0.3982, "num_tokens": 207499089.0, "step": 1085 }, { "epoch": 0.37071172555043524, "grad_norm": 0.26475450283511986, "learning_rate": 4.8697521497218006e-05, "loss": 0.3971, "num_tokens": 207699620.0, "step": 1086 }, { "epoch": 0.3710530807305001, "grad_norm": 0.3118185519024268, "learning_rate": 4.869119878603945e-05, "loss": 0.3814, "num_tokens": 207884022.0, "step": 1087 }, { "epoch": 0.3713944359105649, "grad_norm": 0.22991696910642023, "learning_rate": 4.86848760748609e-05, "loss": 0.3469, "num_tokens": 208114912.0, "step": 1088 }, { "epoch": 0.3717357910906298, "grad_norm": 0.2465953316539884, "learning_rate": 4.867855336368235e-05, "loss": 0.3824, "num_tokens": 208342837.0, "step": 1089 }, { "epoch": 0.37207714627069466, "grad_norm": 0.24895105633716014, "learning_rate": 4.8672230652503794e-05, "loss": 0.3604, "num_tokens": 208533129.0, "step": 1090 }, { "epoch": 0.3724185014507595, "grad_norm": 0.2541018909618809, "learning_rate": 4.866590794132525e-05, "loss": 0.3603, "num_tokens": 208718534.0, "step": 1091 }, { "epoch": 0.3727598566308244, "grad_norm": 0.254567685268941, "learning_rate": 4.865958523014669e-05, "loss": 0.3847, "num_tokens": 208901253.0, "step": 1092 }, { "epoch": 0.37310121181088923, "grad_norm": 0.2849051383108046, "learning_rate": 4.8653262518968135e-05, "loss": 0.3977, "num_tokens": 209093423.0, "step": 1093 }, { "epoch": 0.3734425669909541, "grad_norm": 0.25701337076012165, "learning_rate": 4.864693980778958e-05, "loss": 0.3563, "num_tokens": 209241570.0, "step": 1094 }, { "epoch": 0.37378392217101897, "grad_norm": 0.24076500858263247, "learning_rate": 4.864061709661103e-05, "loss": 0.3627, "num_tokens": 209428774.0, "step": 1095 }, { "epoch": 0.3741252773510838, "grad_norm": 0.2686105267145078, "learning_rate": 4.8634294385432476e-05, "loss": 0.4114, "num_tokens": 209612773.0, "step": 1096 }, { "epoch": 0.37446663253114865, "grad_norm": 0.28382379405787106, "learning_rate": 4.862797167425392e-05, "loss": 0.3473, "num_tokens": 209753905.0, "step": 1097 }, { "epoch": 0.37480798771121354, "grad_norm": 0.2419137502891231, "learning_rate": 4.862164896307537e-05, "loss": 0.3645, "num_tokens": 209967774.0, "step": 1098 }, { "epoch": 0.3751493428912784, "grad_norm": 0.2617327183918542, "learning_rate": 4.861532625189682e-05, "loss": 0.403, "num_tokens": 210147827.0, "step": 1099 }, { "epoch": 0.3754906980713432, "grad_norm": 0.2557897865390249, "learning_rate": 4.8609003540718264e-05, "loss": 0.3793, "num_tokens": 210339540.0, "step": 1100 }, { "epoch": 0.3758320532514081, "grad_norm": 0.2799944959900646, "learning_rate": 4.860268082953971e-05, "loss": 0.3778, "num_tokens": 210494349.0, "step": 1101 }, { "epoch": 0.37617340843147296, "grad_norm": 0.23783910782678153, "learning_rate": 4.859635811836116e-05, "loss": 0.3717, "num_tokens": 210698980.0, "step": 1102 }, { "epoch": 0.3765147636115378, "grad_norm": 0.2679683439174702, "learning_rate": 4.8590035407182605e-05, "loss": 0.4109, "num_tokens": 210897125.0, "step": 1103 }, { "epoch": 0.3768561187916027, "grad_norm": 0.2736742084820104, "learning_rate": 4.8583712696004045e-05, "loss": 0.3614, "num_tokens": 211038127.0, "step": 1104 }, { "epoch": 0.37719747397166753, "grad_norm": 0.25753281664155697, "learning_rate": 4.857738998482549e-05, "loss": 0.3884, "num_tokens": 211236157.0, "step": 1105 }, { "epoch": 0.37753882915173237, "grad_norm": 0.2763446308075203, "learning_rate": 4.857106727364694e-05, "loss": 0.3824, "num_tokens": 211423650.0, "step": 1106 }, { "epoch": 0.3778801843317972, "grad_norm": 0.2602700382646051, "learning_rate": 4.8564744562468386e-05, "loss": 0.3839, "num_tokens": 211625035.0, "step": 1107 }, { "epoch": 0.3782215395118621, "grad_norm": 0.2590313900656675, "learning_rate": 4.855842185128984e-05, "loss": 0.3635, "num_tokens": 211824534.0, "step": 1108 }, { "epoch": 0.37856289469192694, "grad_norm": 0.27133729160197656, "learning_rate": 4.855209914011129e-05, "loss": 0.3944, "num_tokens": 212009408.0, "step": 1109 }, { "epoch": 0.3789042498719918, "grad_norm": 0.2589054268577374, "learning_rate": 4.854577642893273e-05, "loss": 0.381, "num_tokens": 212197049.0, "step": 1110 }, { "epoch": 0.3792456050520567, "grad_norm": 0.2486432611951946, "learning_rate": 4.8539453717754174e-05, "loss": 0.3643, "num_tokens": 212408311.0, "step": 1111 }, { "epoch": 0.3795869602321215, "grad_norm": 0.28906241257355564, "learning_rate": 4.853313100657562e-05, "loss": 0.3994, "num_tokens": 212581944.0, "step": 1112 }, { "epoch": 0.37992831541218636, "grad_norm": 0.2893400207235492, "learning_rate": 4.852680829539707e-05, "loss": 0.3917, "num_tokens": 212748544.0, "step": 1113 }, { "epoch": 0.38026967059225125, "grad_norm": 0.2586778710270099, "learning_rate": 4.8520485584218515e-05, "loss": 0.3471, "num_tokens": 212921452.0, "step": 1114 }, { "epoch": 0.3806110257723161, "grad_norm": 0.2759003829957883, "learning_rate": 4.851416287303996e-05, "loss": 0.3946, "num_tokens": 213113365.0, "step": 1115 }, { "epoch": 0.38095238095238093, "grad_norm": 0.2904902689310191, "learning_rate": 4.850784016186141e-05, "loss": 0.3715, "num_tokens": 213265075.0, "step": 1116 }, { "epoch": 0.3812937361324458, "grad_norm": 0.2627112415330019, "learning_rate": 4.8501517450682856e-05, "loss": 0.3911, "num_tokens": 213448508.0, "step": 1117 }, { "epoch": 0.38163509131251067, "grad_norm": 0.2692043712544127, "learning_rate": 4.84951947395043e-05, "loss": 0.3927, "num_tokens": 213653243.0, "step": 1118 }, { "epoch": 0.3819764464925755, "grad_norm": 0.25912607946237687, "learning_rate": 4.848887202832575e-05, "loss": 0.3972, "num_tokens": 213871807.0, "step": 1119 }, { "epoch": 0.3823178016726404, "grad_norm": 0.29205229621440587, "learning_rate": 4.84825493171472e-05, "loss": 0.3816, "num_tokens": 214009093.0, "step": 1120 }, { "epoch": 0.38265915685270524, "grad_norm": 0.2464742050804405, "learning_rate": 4.8476226605968644e-05, "loss": 0.3632, "num_tokens": 214230562.0, "step": 1121 }, { "epoch": 0.3830005120327701, "grad_norm": 0.27866946778581336, "learning_rate": 4.8469903894790084e-05, "loss": 0.3726, "num_tokens": 214418364.0, "step": 1122 }, { "epoch": 0.383341867212835, "grad_norm": 0.24131909803456167, "learning_rate": 4.846358118361153e-05, "loss": 0.3704, "num_tokens": 214645504.0, "step": 1123 }, { "epoch": 0.3836832223928998, "grad_norm": 0.2669873316340354, "learning_rate": 4.845725847243298e-05, "loss": 0.4142, "num_tokens": 214822528.0, "step": 1124 }, { "epoch": 0.38402457757296465, "grad_norm": 0.27246055560362603, "learning_rate": 4.845093576125443e-05, "loss": 0.4126, "num_tokens": 215016672.0, "step": 1125 }, { "epoch": 0.38436593275302955, "grad_norm": 0.29206258055892736, "learning_rate": 4.844461305007588e-05, "loss": 0.4062, "num_tokens": 215197872.0, "step": 1126 }, { "epoch": 0.3847072879330944, "grad_norm": 0.26018026207213096, "learning_rate": 4.8438290338897326e-05, "loss": 0.3945, "num_tokens": 215379518.0, "step": 1127 }, { "epoch": 0.38504864311315923, "grad_norm": 0.2560847945197175, "learning_rate": 4.8431967627718766e-05, "loss": 0.3888, "num_tokens": 215564498.0, "step": 1128 }, { "epoch": 0.3853899982932241, "grad_norm": 0.24558394805799047, "learning_rate": 4.842564491654021e-05, "loss": 0.3738, "num_tokens": 215770926.0, "step": 1129 }, { "epoch": 0.38573135347328896, "grad_norm": 0.2533407483970532, "learning_rate": 4.841932220536166e-05, "loss": 0.3929, "num_tokens": 215973926.0, "step": 1130 }, { "epoch": 0.3860727086533538, "grad_norm": 0.26885007526951515, "learning_rate": 4.841299949418311e-05, "loss": 0.379, "num_tokens": 216158800.0, "step": 1131 }, { "epoch": 0.3864140638334187, "grad_norm": 0.28371902989547026, "learning_rate": 4.8406676783004554e-05, "loss": 0.398, "num_tokens": 216358952.0, "step": 1132 }, { "epoch": 0.38675541901348354, "grad_norm": 0.27192895141143136, "learning_rate": 4.8400354071826e-05, "loss": 0.3946, "num_tokens": 216564120.0, "step": 1133 }, { "epoch": 0.3870967741935484, "grad_norm": 0.27217091449700576, "learning_rate": 4.839403136064745e-05, "loss": 0.3884, "num_tokens": 216756509.0, "step": 1134 }, { "epoch": 0.38743812937361327, "grad_norm": 0.27754187981144274, "learning_rate": 4.8387708649468895e-05, "loss": 0.3935, "num_tokens": 216950848.0, "step": 1135 }, { "epoch": 0.3877794845536781, "grad_norm": 0.2669570757447976, "learning_rate": 4.838138593829034e-05, "loss": 0.3521, "num_tokens": 217118907.0, "step": 1136 }, { "epoch": 0.38812083973374295, "grad_norm": 0.2990067035816908, "learning_rate": 4.837506322711179e-05, "loss": 0.3927, "num_tokens": 217301810.0, "step": 1137 }, { "epoch": 0.3884621949138078, "grad_norm": 0.28823021727976333, "learning_rate": 4.8368740515933236e-05, "loss": 0.3318, "num_tokens": 217462192.0, "step": 1138 }, { "epoch": 0.3888035500938727, "grad_norm": 0.264245305256271, "learning_rate": 4.836241780475468e-05, "loss": 0.3776, "num_tokens": 217675724.0, "step": 1139 }, { "epoch": 0.3891449052739375, "grad_norm": 0.3004154047654002, "learning_rate": 4.835609509357612e-05, "loss": 0.4232, "num_tokens": 217886373.0, "step": 1140 }, { "epoch": 0.38948626045400236, "grad_norm": 0.2698198569185578, "learning_rate": 4.834977238239757e-05, "loss": 0.4032, "num_tokens": 218076175.0, "step": 1141 }, { "epoch": 0.38982761563406726, "grad_norm": 0.2776491382430381, "learning_rate": 4.8343449671219024e-05, "loss": 0.3612, "num_tokens": 218265572.0, "step": 1142 }, { "epoch": 0.3901689708141321, "grad_norm": 0.26697294585631237, "learning_rate": 4.833712696004047e-05, "loss": 0.4072, "num_tokens": 218461042.0, "step": 1143 }, { "epoch": 0.39051032599419694, "grad_norm": 0.24902595412266756, "learning_rate": 4.833080424886192e-05, "loss": 0.3586, "num_tokens": 218686923.0, "step": 1144 }, { "epoch": 0.39085168117426183, "grad_norm": 0.24003734948773994, "learning_rate": 4.832448153768336e-05, "loss": 0.3661, "num_tokens": 218922941.0, "step": 1145 }, { "epoch": 0.3911930363543267, "grad_norm": 0.27716338272735525, "learning_rate": 4.8318158826504805e-05, "loss": 0.3608, "num_tokens": 219077965.0, "step": 1146 }, { "epoch": 0.3915343915343915, "grad_norm": 0.2596155724282114, "learning_rate": 4.831183611532625e-05, "loss": 0.3838, "num_tokens": 219276706.0, "step": 1147 }, { "epoch": 0.3918757467144564, "grad_norm": 0.26726055721203895, "learning_rate": 4.83055134041477e-05, "loss": 0.3809, "num_tokens": 219499086.0, "step": 1148 }, { "epoch": 0.39221710189452125, "grad_norm": 0.2556938210676573, "learning_rate": 4.8299190692969146e-05, "loss": 0.3878, "num_tokens": 219689652.0, "step": 1149 }, { "epoch": 0.3925584570745861, "grad_norm": 0.2955430705325527, "learning_rate": 4.829286798179059e-05, "loss": 0.3933, "num_tokens": 219848598.0, "step": 1150 }, { "epoch": 0.392899812254651, "grad_norm": 0.3427335933299811, "learning_rate": 4.828654527061204e-05, "loss": 0.3692, "num_tokens": 220049698.0, "step": 1151 }, { "epoch": 0.3932411674347158, "grad_norm": 0.2522395648105789, "learning_rate": 4.828022255943349e-05, "loss": 0.369, "num_tokens": 220241156.0, "step": 1152 }, { "epoch": 0.39358252261478066, "grad_norm": 0.26580332161476516, "learning_rate": 4.8273899848254934e-05, "loss": 0.3779, "num_tokens": 220434663.0, "step": 1153 }, { "epoch": 0.39392387779484556, "grad_norm": 0.27030236037071603, "learning_rate": 4.826757713707638e-05, "loss": 0.3832, "num_tokens": 220611393.0, "step": 1154 }, { "epoch": 0.3942652329749104, "grad_norm": 0.2638259928095794, "learning_rate": 4.826125442589783e-05, "loss": 0.4117, "num_tokens": 220832192.0, "step": 1155 }, { "epoch": 0.39460658815497524, "grad_norm": 0.23577065202300365, "learning_rate": 4.8254931714719275e-05, "loss": 0.3655, "num_tokens": 221054373.0, "step": 1156 }, { "epoch": 0.39494794333504013, "grad_norm": 0.2585827480224908, "learning_rate": 4.8248609003540715e-05, "loss": 0.4254, "num_tokens": 221239160.0, "step": 1157 }, { "epoch": 0.39528929851510497, "grad_norm": 0.2595532611790308, "learning_rate": 4.824228629236217e-05, "loss": 0.4189, "num_tokens": 221444627.0, "step": 1158 }, { "epoch": 0.3956306536951698, "grad_norm": 0.2974672452866211, "learning_rate": 4.8235963581183616e-05, "loss": 0.3865, "num_tokens": 221592239.0, "step": 1159 }, { "epoch": 0.3959720088752347, "grad_norm": 0.2464389202320836, "learning_rate": 4.822964087000506e-05, "loss": 0.3539, "num_tokens": 221791371.0, "step": 1160 }, { "epoch": 0.39631336405529954, "grad_norm": 0.2542938681337432, "learning_rate": 4.822331815882651e-05, "loss": 0.3864, "num_tokens": 221995174.0, "step": 1161 }, { "epoch": 0.3966547192353644, "grad_norm": 0.2610495006259011, "learning_rate": 4.821699544764796e-05, "loss": 0.3593, "num_tokens": 222161050.0, "step": 1162 }, { "epoch": 0.3969960744154293, "grad_norm": 0.25965055122049924, "learning_rate": 4.82106727364694e-05, "loss": 0.41, "num_tokens": 222366522.0, "step": 1163 }, { "epoch": 0.3973374295954941, "grad_norm": 0.2453529816707427, "learning_rate": 4.8204350025290844e-05, "loss": 0.3829, "num_tokens": 222571799.0, "step": 1164 }, { "epoch": 0.39767878477555896, "grad_norm": 0.2721747165084082, "learning_rate": 4.819802731411229e-05, "loss": 0.3976, "num_tokens": 222749170.0, "step": 1165 }, { "epoch": 0.39802013995562385, "grad_norm": 0.26686971246072194, "learning_rate": 4.819170460293374e-05, "loss": 0.3586, "num_tokens": 222898050.0, "step": 1166 }, { "epoch": 0.3983614951356887, "grad_norm": 0.2677914157183486, "learning_rate": 4.8185381891755185e-05, "loss": 0.3848, "num_tokens": 223082274.0, "step": 1167 }, { "epoch": 0.39870285031575353, "grad_norm": 0.27692700602746556, "learning_rate": 4.817905918057664e-05, "loss": 0.3571, "num_tokens": 223261333.0, "step": 1168 }, { "epoch": 0.39904420549581837, "grad_norm": 0.2532643123533925, "learning_rate": 4.817273646939808e-05, "loss": 0.3681, "num_tokens": 223441999.0, "step": 1169 }, { "epoch": 0.39938556067588327, "grad_norm": 0.24469537461556795, "learning_rate": 4.8166413758219526e-05, "loss": 0.3297, "num_tokens": 223623114.0, "step": 1170 }, { "epoch": 0.3997269158559481, "grad_norm": 0.28421768950424564, "learning_rate": 4.816009104704097e-05, "loss": 0.41, "num_tokens": 223799080.0, "step": 1171 }, { "epoch": 0.40006827103601295, "grad_norm": 0.26235566402615523, "learning_rate": 4.815376833586242e-05, "loss": 0.4191, "num_tokens": 224014019.0, "step": 1172 }, { "epoch": 0.40040962621607784, "grad_norm": 0.23682393532883475, "learning_rate": 4.814744562468387e-05, "loss": 0.3832, "num_tokens": 224226398.0, "step": 1173 }, { "epoch": 0.4007509813961427, "grad_norm": 0.2770214596409845, "learning_rate": 4.8141122913505314e-05, "loss": 0.377, "num_tokens": 224392129.0, "step": 1174 }, { "epoch": 0.4010923365762075, "grad_norm": 0.25949505661974687, "learning_rate": 4.813480020232676e-05, "loss": 0.366, "num_tokens": 224560313.0, "step": 1175 }, { "epoch": 0.4014336917562724, "grad_norm": 0.24748659758804617, "learning_rate": 4.812847749114821e-05, "loss": 0.3708, "num_tokens": 224755284.0, "step": 1176 }, { "epoch": 0.40177504693633725, "grad_norm": 0.2513332808814199, "learning_rate": 4.8122154779969655e-05, "loss": 0.3646, "num_tokens": 224937796.0, "step": 1177 }, { "epoch": 0.4021164021164021, "grad_norm": 0.24874415335131966, "learning_rate": 4.81158320687911e-05, "loss": 0.3874, "num_tokens": 225164207.0, "step": 1178 }, { "epoch": 0.402457757296467, "grad_norm": 0.22538311367705008, "learning_rate": 4.810950935761255e-05, "loss": 0.3461, "num_tokens": 225347369.0, "step": 1179 }, { "epoch": 0.40279911247653183, "grad_norm": 0.24317507417264173, "learning_rate": 4.8103186646433996e-05, "loss": 0.3977, "num_tokens": 225557931.0, "step": 1180 }, { "epoch": 0.40314046765659667, "grad_norm": 0.2415453675692468, "learning_rate": 4.8096863935255436e-05, "loss": 0.3637, "num_tokens": 225736078.0, "step": 1181 }, { "epoch": 0.40348182283666156, "grad_norm": 0.2853095818779707, "learning_rate": 4.809054122407688e-05, "loss": 0.3748, "num_tokens": 225876878.0, "step": 1182 }, { "epoch": 0.4038231780167264, "grad_norm": 0.25335551352215097, "learning_rate": 4.808421851289833e-05, "loss": 0.3572, "num_tokens": 226059854.0, "step": 1183 }, { "epoch": 0.40416453319679124, "grad_norm": 0.2522969183899323, "learning_rate": 4.807789580171978e-05, "loss": 0.3659, "num_tokens": 226244780.0, "step": 1184 }, { "epoch": 0.40450588837685614, "grad_norm": 0.2377487702530745, "learning_rate": 4.807157309054123e-05, "loss": 0.3897, "num_tokens": 226437732.0, "step": 1185 }, { "epoch": 0.404847243556921, "grad_norm": 0.2565215038768563, "learning_rate": 4.806525037936268e-05, "loss": 0.4293, "num_tokens": 226656948.0, "step": 1186 }, { "epoch": 0.4051885987369858, "grad_norm": 0.24472156627494115, "learning_rate": 4.805892766818412e-05, "loss": 0.3774, "num_tokens": 226867767.0, "step": 1187 }, { "epoch": 0.4055299539170507, "grad_norm": 0.2589004681776695, "learning_rate": 4.8052604957005565e-05, "loss": 0.3896, "num_tokens": 227038598.0, "step": 1188 }, { "epoch": 0.40587130909711555, "grad_norm": 0.23281447249040993, "learning_rate": 4.804628224582701e-05, "loss": 0.3953, "num_tokens": 227268469.0, "step": 1189 }, { "epoch": 0.4062126642771804, "grad_norm": 0.2310293067179763, "learning_rate": 4.803995953464846e-05, "loss": 0.3543, "num_tokens": 227472864.0, "step": 1190 }, { "epoch": 0.4065540194572453, "grad_norm": 0.24178220830601535, "learning_rate": 4.8033636823469906e-05, "loss": 0.3482, "num_tokens": 227651490.0, "step": 1191 }, { "epoch": 0.4068953746373101, "grad_norm": 0.26219118111984197, "learning_rate": 4.802731411229135e-05, "loss": 0.374, "num_tokens": 227814545.0, "step": 1192 }, { "epoch": 0.40723672981737497, "grad_norm": 0.24616815707074058, "learning_rate": 4.80209914011128e-05, "loss": 0.367, "num_tokens": 228043198.0, "step": 1193 }, { "epoch": 0.40757808499743986, "grad_norm": 0.2822888076259996, "learning_rate": 4.801466868993425e-05, "loss": 0.3896, "num_tokens": 228216792.0, "step": 1194 }, { "epoch": 0.4079194401775047, "grad_norm": 0.2504936764030185, "learning_rate": 4.8008345978755694e-05, "loss": 0.4072, "num_tokens": 228448487.0, "step": 1195 }, { "epoch": 0.40826079535756954, "grad_norm": 0.24323865434348008, "learning_rate": 4.800202326757714e-05, "loss": 0.3726, "num_tokens": 228658566.0, "step": 1196 }, { "epoch": 0.40860215053763443, "grad_norm": 0.22666072627561884, "learning_rate": 4.799570055639859e-05, "loss": 0.3918, "num_tokens": 228897177.0, "step": 1197 }, { "epoch": 0.4089435057176993, "grad_norm": 0.26728283249442064, "learning_rate": 4.7989377845220035e-05, "loss": 0.3593, "num_tokens": 229050017.0, "step": 1198 }, { "epoch": 0.4092848608977641, "grad_norm": 0.25756517589025796, "learning_rate": 4.7983055134041475e-05, "loss": 0.3723, "num_tokens": 229220669.0, "step": 1199 }, { "epoch": 0.409626216077829, "grad_norm": 0.22956107933170833, "learning_rate": 4.797673242286292e-05, "loss": 0.3784, "num_tokens": 229432556.0, "step": 1200 }, { "epoch": 0.40996757125789385, "grad_norm": 0.2596845800889682, "learning_rate": 4.797040971168437e-05, "loss": 0.3626, "num_tokens": 229612019.0, "step": 1201 }, { "epoch": 0.4103089264379587, "grad_norm": 0.229146613994493, "learning_rate": 4.796408700050582e-05, "loss": 0.3642, "num_tokens": 229814140.0, "step": 1202 }, { "epoch": 0.4106502816180235, "grad_norm": 0.23952926493469856, "learning_rate": 4.795776428932727e-05, "loss": 0.3683, "num_tokens": 230026254.0, "step": 1203 }, { "epoch": 0.4109916367980884, "grad_norm": 0.27018322428158903, "learning_rate": 4.7951441578148716e-05, "loss": 0.3782, "num_tokens": 230232986.0, "step": 1204 }, { "epoch": 0.41133299197815326, "grad_norm": 0.2555970462482741, "learning_rate": 4.794511886697016e-05, "loss": 0.4033, "num_tokens": 230420022.0, "step": 1205 }, { "epoch": 0.4116743471582181, "grad_norm": 0.29643715974249313, "learning_rate": 4.7938796155791604e-05, "loss": 0.3732, "num_tokens": 230590610.0, "step": 1206 }, { "epoch": 0.412015702338283, "grad_norm": 0.2698568311400083, "learning_rate": 4.793247344461305e-05, "loss": 0.4173, "num_tokens": 230792939.0, "step": 1207 }, { "epoch": 0.41235705751834784, "grad_norm": 0.2546972705676788, "learning_rate": 4.79261507334345e-05, "loss": 0.3818, "num_tokens": 230959810.0, "step": 1208 }, { "epoch": 0.4126984126984127, "grad_norm": 0.26282134352572367, "learning_rate": 4.7919828022255945e-05, "loss": 0.4053, "num_tokens": 231174664.0, "step": 1209 }, { "epoch": 0.41303976787847757, "grad_norm": 0.2645543639230189, "learning_rate": 4.791350531107739e-05, "loss": 0.3609, "num_tokens": 231396149.0, "step": 1210 }, { "epoch": 0.4133811230585424, "grad_norm": 0.29592138811129576, "learning_rate": 4.790718259989884e-05, "loss": 0.4201, "num_tokens": 231568043.0, "step": 1211 }, { "epoch": 0.41372247823860725, "grad_norm": 0.23575635258912134, "learning_rate": 4.7900859888720286e-05, "loss": 0.363, "num_tokens": 231744701.0, "step": 1212 }, { "epoch": 0.41406383341867214, "grad_norm": 0.24013912651679403, "learning_rate": 4.789453717754173e-05, "loss": 0.4091, "num_tokens": 231979866.0, "step": 1213 }, { "epoch": 0.414405188598737, "grad_norm": 0.2420050491456371, "learning_rate": 4.788821446636318e-05, "loss": 0.3755, "num_tokens": 232183026.0, "step": 1214 }, { "epoch": 0.4147465437788018, "grad_norm": 0.25058383469899204, "learning_rate": 4.7881891755184627e-05, "loss": 0.3718, "num_tokens": 232377181.0, "step": 1215 }, { "epoch": 0.4150878989588667, "grad_norm": 0.265415627625761, "learning_rate": 4.7875569044006074e-05, "loss": 0.4062, "num_tokens": 232541627.0, "step": 1216 }, { "epoch": 0.41542925413893156, "grad_norm": 0.24773265954414828, "learning_rate": 4.7869246332827514e-05, "loss": 0.3878, "num_tokens": 232747356.0, "step": 1217 }, { "epoch": 0.4157706093189964, "grad_norm": 0.2812306478509173, "learning_rate": 4.786292362164896e-05, "loss": 0.4339, "num_tokens": 232946510.0, "step": 1218 }, { "epoch": 0.4161119644990613, "grad_norm": 0.26168091545808986, "learning_rate": 4.7856600910470414e-05, "loss": 0.3897, "num_tokens": 233128343.0, "step": 1219 }, { "epoch": 0.41645331967912613, "grad_norm": 0.25021849769451393, "learning_rate": 4.785027819929186e-05, "loss": 0.3809, "num_tokens": 233332229.0, "step": 1220 }, { "epoch": 0.416794674859191, "grad_norm": 0.2404383432369618, "learning_rate": 4.784395548811331e-05, "loss": 0.3505, "num_tokens": 233522049.0, "step": 1221 }, { "epoch": 0.41713603003925587, "grad_norm": 0.25493454293042167, "learning_rate": 4.7837632776934755e-05, "loss": 0.3747, "num_tokens": 233712420.0, "step": 1222 }, { "epoch": 0.4174773852193207, "grad_norm": 0.2479461086596157, "learning_rate": 4.7831310065756196e-05, "loss": 0.3957, "num_tokens": 233895654.0, "step": 1223 }, { "epoch": 0.41781874039938555, "grad_norm": 0.24446599218694032, "learning_rate": 4.782498735457764e-05, "loss": 0.3652, "num_tokens": 234088035.0, "step": 1224 }, { "epoch": 0.41816009557945044, "grad_norm": 0.3093557626982965, "learning_rate": 4.781866464339909e-05, "loss": 0.4034, "num_tokens": 234272560.0, "step": 1225 }, { "epoch": 0.4185014507595153, "grad_norm": 0.2489760409083324, "learning_rate": 4.7812341932220537e-05, "loss": 0.3843, "num_tokens": 234464921.0, "step": 1226 }, { "epoch": 0.4188428059395801, "grad_norm": 0.2857392425424046, "learning_rate": 4.7806019221041984e-05, "loss": 0.4018, "num_tokens": 234646742.0, "step": 1227 }, { "epoch": 0.419184161119645, "grad_norm": 0.27643676728854766, "learning_rate": 4.779969650986344e-05, "loss": 0.3955, "num_tokens": 234829704.0, "step": 1228 }, { "epoch": 0.41952551629970986, "grad_norm": 0.25306510219282297, "learning_rate": 4.779337379868488e-05, "loss": 0.3726, "num_tokens": 235016915.0, "step": 1229 }, { "epoch": 0.4198668714797747, "grad_norm": 0.2636191503993529, "learning_rate": 4.7787051087506324e-05, "loss": 0.377, "num_tokens": 235176478.0, "step": 1230 }, { "epoch": 0.4202082266598396, "grad_norm": 0.26240630697860123, "learning_rate": 4.778072837632777e-05, "loss": 0.3673, "num_tokens": 235355561.0, "step": 1231 }, { "epoch": 0.42054958183990443, "grad_norm": 0.2678327287036716, "learning_rate": 4.777440566514922e-05, "loss": 0.366, "num_tokens": 235544530.0, "step": 1232 }, { "epoch": 0.42089093701996927, "grad_norm": 0.26162365320635245, "learning_rate": 4.7768082953970665e-05, "loss": 0.423, "num_tokens": 235756103.0, "step": 1233 }, { "epoch": 0.4212322922000341, "grad_norm": 0.2705162911230427, "learning_rate": 4.776176024279211e-05, "loss": 0.3692, "num_tokens": 235960406.0, "step": 1234 }, { "epoch": 0.421573647380099, "grad_norm": 0.2807898089949096, "learning_rate": 4.775543753161356e-05, "loss": 0.42, "num_tokens": 236146875.0, "step": 1235 }, { "epoch": 0.42191500256016384, "grad_norm": 0.21597060494155046, "learning_rate": 4.7749114820435006e-05, "loss": 0.3536, "num_tokens": 236369934.0, "step": 1236 }, { "epoch": 0.4222563577402287, "grad_norm": 0.25794764196871756, "learning_rate": 4.774279210925645e-05, "loss": 0.391, "num_tokens": 236573407.0, "step": 1237 }, { "epoch": 0.4225977129202936, "grad_norm": 0.2554482515583962, "learning_rate": 4.77364693980779e-05, "loss": 0.3868, "num_tokens": 236745846.0, "step": 1238 }, { "epoch": 0.4229390681003584, "grad_norm": 0.25066853918501353, "learning_rate": 4.773014668689935e-05, "loss": 0.3822, "num_tokens": 236920786.0, "step": 1239 }, { "epoch": 0.42328042328042326, "grad_norm": 0.2674701121333015, "learning_rate": 4.7723823975720794e-05, "loss": 0.4064, "num_tokens": 237101493.0, "step": 1240 }, { "epoch": 0.42362177846048815, "grad_norm": 0.2945243222664658, "learning_rate": 4.7717501264542235e-05, "loss": 0.3624, "num_tokens": 237277154.0, "step": 1241 }, { "epoch": 0.423963133640553, "grad_norm": 0.24028898238120594, "learning_rate": 4.771117855336368e-05, "loss": 0.3585, "num_tokens": 237460013.0, "step": 1242 }, { "epoch": 0.42430448882061783, "grad_norm": 0.31471965541696706, "learning_rate": 4.770485584218513e-05, "loss": 0.4252, "num_tokens": 237701396.0, "step": 1243 }, { "epoch": 0.4246458440006827, "grad_norm": 0.2376414888511679, "learning_rate": 4.7698533131006575e-05, "loss": 0.3485, "num_tokens": 237879761.0, "step": 1244 }, { "epoch": 0.42498719918074757, "grad_norm": 0.26998512957422827, "learning_rate": 4.769221041982803e-05, "loss": 0.3735, "num_tokens": 238047002.0, "step": 1245 }, { "epoch": 0.4253285543608124, "grad_norm": 0.26446992287514104, "learning_rate": 4.7685887708649476e-05, "loss": 0.3906, "num_tokens": 238251980.0, "step": 1246 }, { "epoch": 0.4256699095408773, "grad_norm": 0.3158927458466595, "learning_rate": 4.7679564997470916e-05, "loss": 0.4008, "num_tokens": 238437126.0, "step": 1247 }, { "epoch": 0.42601126472094214, "grad_norm": 0.26032218403656004, "learning_rate": 4.7673242286292363e-05, "loss": 0.4016, "num_tokens": 238625600.0, "step": 1248 }, { "epoch": 0.426352619901007, "grad_norm": 0.2545484965471973, "learning_rate": 4.766691957511381e-05, "loss": 0.3854, "num_tokens": 238831898.0, "step": 1249 }, { "epoch": 0.4266939750810719, "grad_norm": 0.2722144450963397, "learning_rate": 4.766059686393526e-05, "loss": 0.3727, "num_tokens": 238975208.0, "step": 1250 }, { "epoch": 0.4270353302611367, "grad_norm": 0.24992105153891953, "learning_rate": 4.7654274152756704e-05, "loss": 0.3722, "num_tokens": 239169755.0, "step": 1251 }, { "epoch": 0.42737668544120155, "grad_norm": 0.2572111742739011, "learning_rate": 4.764795144157815e-05, "loss": 0.3794, "num_tokens": 239366599.0, "step": 1252 }, { "epoch": 0.42771804062126645, "grad_norm": 0.2481649996554703, "learning_rate": 4.76416287303996e-05, "loss": 0.387, "num_tokens": 239576501.0, "step": 1253 }, { "epoch": 0.4280593958013313, "grad_norm": 0.28293426527471605, "learning_rate": 4.7635306019221045e-05, "loss": 0.4021, "num_tokens": 239751646.0, "step": 1254 }, { "epoch": 0.42840075098139613, "grad_norm": 0.25088364165174004, "learning_rate": 4.762898330804249e-05, "loss": 0.3669, "num_tokens": 239929123.0, "step": 1255 }, { "epoch": 0.428742106161461, "grad_norm": 0.24644250592857955, "learning_rate": 4.762266059686394e-05, "loss": 0.3776, "num_tokens": 240115800.0, "step": 1256 }, { "epoch": 0.42908346134152586, "grad_norm": 0.27535054965486805, "learning_rate": 4.7616337885685386e-05, "loss": 0.4162, "num_tokens": 240293040.0, "step": 1257 }, { "epoch": 0.4294248165215907, "grad_norm": 0.25196135858701296, "learning_rate": 4.761001517450683e-05, "loss": 0.3677, "num_tokens": 240466120.0, "step": 1258 }, { "epoch": 0.4297661717016556, "grad_norm": 0.25573433470576556, "learning_rate": 4.7603692463328273e-05, "loss": 0.3806, "num_tokens": 240648840.0, "step": 1259 }, { "epoch": 0.43010752688172044, "grad_norm": 0.2719678851444447, "learning_rate": 4.759736975214972e-05, "loss": 0.3927, "num_tokens": 240816514.0, "step": 1260 }, { "epoch": 0.4304488820617853, "grad_norm": 0.2686485936349439, "learning_rate": 4.759104704097117e-05, "loss": 0.4063, "num_tokens": 241008548.0, "step": 1261 }, { "epoch": 0.43079023724185017, "grad_norm": 0.27001967237611824, "learning_rate": 4.758472432979262e-05, "loss": 0.3639, "num_tokens": 241175597.0, "step": 1262 }, { "epoch": 0.431131592421915, "grad_norm": 0.24513647835717495, "learning_rate": 4.757840161861407e-05, "loss": 0.383, "num_tokens": 241380177.0, "step": 1263 }, { "epoch": 0.43147294760197985, "grad_norm": 0.32558623007096665, "learning_rate": 4.7572078907435515e-05, "loss": 0.4345, "num_tokens": 241563581.0, "step": 1264 }, { "epoch": 0.4318143027820447, "grad_norm": 0.29380154447885176, "learning_rate": 4.7565756196256955e-05, "loss": 0.3756, "num_tokens": 241732862.0, "step": 1265 }, { "epoch": 0.4321556579621096, "grad_norm": 0.26434351733107037, "learning_rate": 4.75594334850784e-05, "loss": 0.3909, "num_tokens": 241938152.0, "step": 1266 }, { "epoch": 0.4324970131421744, "grad_norm": 0.2740782011655568, "learning_rate": 4.755311077389985e-05, "loss": 0.4021, "num_tokens": 242132279.0, "step": 1267 }, { "epoch": 0.43283836832223926, "grad_norm": 0.2846203787922339, "learning_rate": 4.7546788062721296e-05, "loss": 0.416, "num_tokens": 242314952.0, "step": 1268 }, { "epoch": 0.43317972350230416, "grad_norm": 0.24375650777351762, "learning_rate": 4.754046535154274e-05, "loss": 0.3602, "num_tokens": 242517887.0, "step": 1269 }, { "epoch": 0.433521078682369, "grad_norm": 0.2529921554616735, "learning_rate": 4.753414264036419e-05, "loss": 0.3616, "num_tokens": 242690025.0, "step": 1270 }, { "epoch": 0.43386243386243384, "grad_norm": 0.2509843653681884, "learning_rate": 4.752781992918564e-05, "loss": 0.4219, "num_tokens": 242908858.0, "step": 1271 }, { "epoch": 0.43420378904249873, "grad_norm": 0.24397746062078454, "learning_rate": 4.7521497218007084e-05, "loss": 0.3917, "num_tokens": 243120613.0, "step": 1272 }, { "epoch": 0.4345451442225636, "grad_norm": 0.2736078079123328, "learning_rate": 4.751517450682853e-05, "loss": 0.39, "num_tokens": 243303355.0, "step": 1273 }, { "epoch": 0.4348864994026284, "grad_norm": 0.2460072076917452, "learning_rate": 4.750885179564998e-05, "loss": 0.3811, "num_tokens": 243491087.0, "step": 1274 }, { "epoch": 0.4352278545826933, "grad_norm": 0.2488892880980474, "learning_rate": 4.7502529084471425e-05, "loss": 0.3699, "num_tokens": 243713091.0, "step": 1275 }, { "epoch": 0.43556920976275815, "grad_norm": 0.24809568774493604, "learning_rate": 4.7496206373292865e-05, "loss": 0.3756, "num_tokens": 243892499.0, "step": 1276 }, { "epoch": 0.435910564942823, "grad_norm": 0.2479773095327674, "learning_rate": 4.748988366211431e-05, "loss": 0.364, "num_tokens": 244056601.0, "step": 1277 }, { "epoch": 0.4362519201228879, "grad_norm": 0.2495093243950158, "learning_rate": 4.748356095093576e-05, "loss": 0.3623, "num_tokens": 244266172.0, "step": 1278 }, { "epoch": 0.4365932753029527, "grad_norm": 0.2550828228011225, "learning_rate": 4.747723823975721e-05, "loss": 0.3644, "num_tokens": 244468018.0, "step": 1279 }, { "epoch": 0.43693463048301756, "grad_norm": 0.2613764094030855, "learning_rate": 4.747091552857866e-05, "loss": 0.3409, "num_tokens": 244666725.0, "step": 1280 }, { "epoch": 0.43727598566308246, "grad_norm": 0.2461162811892664, "learning_rate": 4.746459281740011e-05, "loss": 0.3996, "num_tokens": 244918595.0, "step": 1281 }, { "epoch": 0.4376173408431473, "grad_norm": 0.3001004070302783, "learning_rate": 4.745827010622155e-05, "loss": 0.4056, "num_tokens": 245098598.0, "step": 1282 }, { "epoch": 0.43795869602321214, "grad_norm": 0.25524150546558716, "learning_rate": 4.7451947395042994e-05, "loss": 0.3664, "num_tokens": 245335431.0, "step": 1283 }, { "epoch": 0.43830005120327703, "grad_norm": 0.261530003980693, "learning_rate": 4.744562468386444e-05, "loss": 0.4106, "num_tokens": 245523923.0, "step": 1284 }, { "epoch": 0.43864140638334187, "grad_norm": 0.23613557435835586, "learning_rate": 4.743930197268589e-05, "loss": 0.3589, "num_tokens": 245738188.0, "step": 1285 }, { "epoch": 0.4389827615634067, "grad_norm": 0.2826001037655982, "learning_rate": 4.7432979261507335e-05, "loss": 0.3817, "num_tokens": 245900590.0, "step": 1286 }, { "epoch": 0.4393241167434716, "grad_norm": 0.27161913934967924, "learning_rate": 4.742665655032878e-05, "loss": 0.4097, "num_tokens": 246096536.0, "step": 1287 }, { "epoch": 0.43966547192353644, "grad_norm": 0.2757044772398571, "learning_rate": 4.742033383915023e-05, "loss": 0.4454, "num_tokens": 246317652.0, "step": 1288 }, { "epoch": 0.4400068271036013, "grad_norm": 0.28622328832933225, "learning_rate": 4.7414011127971676e-05, "loss": 0.3848, "num_tokens": 246480709.0, "step": 1289 }, { "epoch": 0.4403481822836662, "grad_norm": 0.26520076182803337, "learning_rate": 4.740768841679312e-05, "loss": 0.3911, "num_tokens": 246676283.0, "step": 1290 }, { "epoch": 0.440689537463731, "grad_norm": 0.260734568366426, "learning_rate": 4.740136570561457e-05, "loss": 0.3686, "num_tokens": 246871822.0, "step": 1291 }, { "epoch": 0.44103089264379586, "grad_norm": 0.24657404466053603, "learning_rate": 4.739504299443602e-05, "loss": 0.3852, "num_tokens": 247062141.0, "step": 1292 }, { "epoch": 0.44137224782386075, "grad_norm": 0.2507165840605578, "learning_rate": 4.7388720283257464e-05, "loss": 0.3627, "num_tokens": 247235069.0, "step": 1293 }, { "epoch": 0.4417136030039256, "grad_norm": 0.24962851358455918, "learning_rate": 4.7382397572078904e-05, "loss": 0.3826, "num_tokens": 247431022.0, "step": 1294 }, { "epoch": 0.44205495818399043, "grad_norm": 0.27549676740300544, "learning_rate": 4.737607486090035e-05, "loss": 0.3568, "num_tokens": 247568722.0, "step": 1295 }, { "epoch": 0.4423963133640553, "grad_norm": 0.27288450768880446, "learning_rate": 4.7369752149721805e-05, "loss": 0.3699, "num_tokens": 247730618.0, "step": 1296 }, { "epoch": 0.44273766854412017, "grad_norm": 0.25134410450331063, "learning_rate": 4.736342943854325e-05, "loss": 0.4037, "num_tokens": 247922937.0, "step": 1297 }, { "epoch": 0.443079023724185, "grad_norm": 0.23817117743324281, "learning_rate": 4.73571067273647e-05, "loss": 0.3369, "num_tokens": 248100664.0, "step": 1298 }, { "epoch": 0.44342037890424985, "grad_norm": 0.289226970988559, "learning_rate": 4.7350784016186146e-05, "loss": 0.3729, "num_tokens": 248260227.0, "step": 1299 }, { "epoch": 0.44376173408431474, "grad_norm": 0.28526412632371756, "learning_rate": 4.7344461305007586e-05, "loss": 0.4036, "num_tokens": 248443631.0, "step": 1300 }, { "epoch": 0.4441030892643796, "grad_norm": 0.2402155757515521, "learning_rate": 4.733813859382903e-05, "loss": 0.3601, "num_tokens": 248648859.0, "step": 1301 }, { "epoch": 0.4444444444444444, "grad_norm": 0.2605065801355406, "learning_rate": 4.733181588265048e-05, "loss": 0.3845, "num_tokens": 248841840.0, "step": 1302 }, { "epoch": 0.4447857996245093, "grad_norm": 0.27258432254567544, "learning_rate": 4.732549317147193e-05, "loss": 0.3845, "num_tokens": 249050152.0, "step": 1303 }, { "epoch": 0.44512715480457415, "grad_norm": 0.2683818598465306, "learning_rate": 4.7319170460293374e-05, "loss": 0.4096, "num_tokens": 249229669.0, "step": 1304 }, { "epoch": 0.445468509984639, "grad_norm": 0.27627220686854115, "learning_rate": 4.731284774911483e-05, "loss": 0.3833, "num_tokens": 249385138.0, "step": 1305 }, { "epoch": 0.4458098651647039, "grad_norm": 0.2747198311349078, "learning_rate": 4.730652503793627e-05, "loss": 0.4025, "num_tokens": 249569652.0, "step": 1306 }, { "epoch": 0.44615122034476873, "grad_norm": 0.29690208661671197, "learning_rate": 4.7300202326757715e-05, "loss": 0.4071, "num_tokens": 249762835.0, "step": 1307 }, { "epoch": 0.44649257552483357, "grad_norm": 0.23286759311114202, "learning_rate": 4.729387961557916e-05, "loss": 0.3541, "num_tokens": 249978296.0, "step": 1308 }, { "epoch": 0.44683393070489846, "grad_norm": 0.24407553001814508, "learning_rate": 4.728755690440061e-05, "loss": 0.3713, "num_tokens": 250166242.0, "step": 1309 }, { "epoch": 0.4471752858849633, "grad_norm": 0.2766598172595908, "learning_rate": 4.7281234193222056e-05, "loss": 0.394, "num_tokens": 250359227.0, "step": 1310 }, { "epoch": 0.44751664106502814, "grad_norm": 0.2508587696179088, "learning_rate": 4.72749114820435e-05, "loss": 0.3565, "num_tokens": 250539731.0, "step": 1311 }, { "epoch": 0.44785799624509304, "grad_norm": 0.2520275641951093, "learning_rate": 4.726858877086495e-05, "loss": 0.3864, "num_tokens": 250712461.0, "step": 1312 }, { "epoch": 0.4481993514251579, "grad_norm": 0.2889247355362315, "learning_rate": 4.72622660596864e-05, "loss": 0.3852, "num_tokens": 250866299.0, "step": 1313 }, { "epoch": 0.4485407066052227, "grad_norm": 0.26069053797248454, "learning_rate": 4.7255943348507844e-05, "loss": 0.4148, "num_tokens": 251101798.0, "step": 1314 }, { "epoch": 0.4488820617852876, "grad_norm": 0.26347375080518326, "learning_rate": 4.724962063732929e-05, "loss": 0.36, "num_tokens": 251277624.0, "step": 1315 }, { "epoch": 0.44922341696535245, "grad_norm": 0.23635490430449144, "learning_rate": 4.724329792615074e-05, "loss": 0.3903, "num_tokens": 251498105.0, "step": 1316 }, { "epoch": 0.4495647721454173, "grad_norm": 0.24902214364411657, "learning_rate": 4.7236975214972185e-05, "loss": 0.3643, "num_tokens": 251677481.0, "step": 1317 }, { "epoch": 0.4499061273254822, "grad_norm": 0.2375048326421164, "learning_rate": 4.7230652503793625e-05, "loss": 0.3385, "num_tokens": 251873573.0, "step": 1318 }, { "epoch": 0.450247482505547, "grad_norm": 0.2407497716379536, "learning_rate": 4.722432979261507e-05, "loss": 0.3541, "num_tokens": 252092198.0, "step": 1319 }, { "epoch": 0.45058883768561186, "grad_norm": 0.23333396495345682, "learning_rate": 4.721800708143652e-05, "loss": 0.3799, "num_tokens": 252293533.0, "step": 1320 }, { "epoch": 0.45093019286567676, "grad_norm": 0.2761972132818852, "learning_rate": 4.7211684370257966e-05, "loss": 0.3879, "num_tokens": 252497608.0, "step": 1321 }, { "epoch": 0.4512715480457416, "grad_norm": 0.24348923417540363, "learning_rate": 4.720536165907942e-05, "loss": 0.3793, "num_tokens": 252702572.0, "step": 1322 }, { "epoch": 0.45161290322580644, "grad_norm": 0.26746617045577126, "learning_rate": 4.719903894790087e-05, "loss": 0.3919, "num_tokens": 252874554.0, "step": 1323 }, { "epoch": 0.45195425840587133, "grad_norm": 0.25409211247627156, "learning_rate": 4.719271623672231e-05, "loss": 0.4219, "num_tokens": 253097099.0, "step": 1324 }, { "epoch": 0.4522956135859362, "grad_norm": 0.26244062094499865, "learning_rate": 4.7186393525543754e-05, "loss": 0.3942, "num_tokens": 253261343.0, "step": 1325 }, { "epoch": 0.452636968766001, "grad_norm": 0.2615527160068468, "learning_rate": 4.71800708143652e-05, "loss": 0.3813, "num_tokens": 253427250.0, "step": 1326 }, { "epoch": 0.4529783239460659, "grad_norm": 0.2564682848196816, "learning_rate": 4.717374810318665e-05, "loss": 0.3682, "num_tokens": 253593245.0, "step": 1327 }, { "epoch": 0.45331967912613075, "grad_norm": 0.2367669735636446, "learning_rate": 4.7167425392008095e-05, "loss": 0.3547, "num_tokens": 253772935.0, "step": 1328 }, { "epoch": 0.4536610343061956, "grad_norm": 0.23132042080701817, "learning_rate": 4.716110268082954e-05, "loss": 0.3673, "num_tokens": 253995816.0, "step": 1329 }, { "epoch": 0.4540023894862604, "grad_norm": 0.27281116506781006, "learning_rate": 4.715477996965099e-05, "loss": 0.3945, "num_tokens": 254178266.0, "step": 1330 }, { "epoch": 0.4543437446663253, "grad_norm": 0.28100630370665586, "learning_rate": 4.7148457258472436e-05, "loss": 0.4, "num_tokens": 254338596.0, "step": 1331 }, { "epoch": 0.45468509984639016, "grad_norm": 0.26059733989149686, "learning_rate": 4.714213454729388e-05, "loss": 0.3741, "num_tokens": 254496231.0, "step": 1332 }, { "epoch": 0.455026455026455, "grad_norm": 0.25571869191794333, "learning_rate": 4.713581183611533e-05, "loss": 0.3943, "num_tokens": 254758212.0, "step": 1333 }, { "epoch": 0.4553678102065199, "grad_norm": 0.26001729344091995, "learning_rate": 4.712948912493678e-05, "loss": 0.3847, "num_tokens": 254972385.0, "step": 1334 }, { "epoch": 0.45570916538658474, "grad_norm": 0.28122212924464995, "learning_rate": 4.7123166413758224e-05, "loss": 0.3624, "num_tokens": 255117584.0, "step": 1335 }, { "epoch": 0.4560505205666496, "grad_norm": 0.232586478294816, "learning_rate": 4.7116843702579664e-05, "loss": 0.3659, "num_tokens": 255310927.0, "step": 1336 }, { "epoch": 0.45639187574671447, "grad_norm": 0.25320470071627355, "learning_rate": 4.711052099140111e-05, "loss": 0.4031, "num_tokens": 255530201.0, "step": 1337 }, { "epoch": 0.4567332309267793, "grad_norm": 0.24707444823488672, "learning_rate": 4.710419828022256e-05, "loss": 0.3813, "num_tokens": 255734907.0, "step": 1338 }, { "epoch": 0.45707458610684415, "grad_norm": 0.25950076809769823, "learning_rate": 4.709787556904401e-05, "loss": 0.3666, "num_tokens": 255896740.0, "step": 1339 }, { "epoch": 0.45741594128690904, "grad_norm": 0.252862255075806, "learning_rate": 4.709155285786546e-05, "loss": 0.3997, "num_tokens": 256100456.0, "step": 1340 }, { "epoch": 0.4577572964669739, "grad_norm": 0.2536667771176279, "learning_rate": 4.7085230146686906e-05, "loss": 0.3878, "num_tokens": 256293425.0, "step": 1341 }, { "epoch": 0.4580986516470387, "grad_norm": 0.24667351389813638, "learning_rate": 4.7078907435508346e-05, "loss": 0.3689, "num_tokens": 256491123.0, "step": 1342 }, { "epoch": 0.4584400068271036, "grad_norm": 0.2386748583859867, "learning_rate": 4.707258472432979e-05, "loss": 0.3521, "num_tokens": 256669355.0, "step": 1343 }, { "epoch": 0.45878136200716846, "grad_norm": 0.2624266276788351, "learning_rate": 4.706626201315124e-05, "loss": 0.3768, "num_tokens": 256853731.0, "step": 1344 }, { "epoch": 0.4591227171872333, "grad_norm": 0.2483056847906158, "learning_rate": 4.705993930197269e-05, "loss": 0.3755, "num_tokens": 257020793.0, "step": 1345 }, { "epoch": 0.4594640723672982, "grad_norm": 0.23488759495012834, "learning_rate": 4.7053616590794134e-05, "loss": 0.3509, "num_tokens": 257247853.0, "step": 1346 }, { "epoch": 0.45980542754736303, "grad_norm": 0.2418511555301585, "learning_rate": 4.704729387961558e-05, "loss": 0.3919, "num_tokens": 257466040.0, "step": 1347 }, { "epoch": 0.46014678272742787, "grad_norm": 0.2518230949953767, "learning_rate": 4.704097116843703e-05, "loss": 0.3953, "num_tokens": 257664694.0, "step": 1348 }, { "epoch": 0.46048813790749277, "grad_norm": 0.24347820559041275, "learning_rate": 4.7034648457258475e-05, "loss": 0.3753, "num_tokens": 257885102.0, "step": 1349 }, { "epoch": 0.4608294930875576, "grad_norm": 0.25044578415107044, "learning_rate": 4.702832574607992e-05, "loss": 0.3556, "num_tokens": 258077697.0, "step": 1350 }, { "epoch": 0.46117084826762245, "grad_norm": 0.22353596047142116, "learning_rate": 4.702200303490137e-05, "loss": 0.3456, "num_tokens": 258258460.0, "step": 1351 }, { "epoch": 0.46151220344768734, "grad_norm": 0.28332306298119586, "learning_rate": 4.7015680323722816e-05, "loss": 0.4048, "num_tokens": 258420441.0, "step": 1352 }, { "epoch": 0.4618535586277522, "grad_norm": 0.2947462816119517, "learning_rate": 4.700935761254426e-05, "loss": 0.3775, "num_tokens": 258643815.0, "step": 1353 }, { "epoch": 0.462194913807817, "grad_norm": 0.24471818475577106, "learning_rate": 4.70030349013657e-05, "loss": 0.3862, "num_tokens": 258869337.0, "step": 1354 }, { "epoch": 0.4625362689878819, "grad_norm": 0.284550197536348, "learning_rate": 4.699671219018715e-05, "loss": 0.3815, "num_tokens": 259055068.0, "step": 1355 }, { "epoch": 0.46287762416794676, "grad_norm": 0.264793622205313, "learning_rate": 4.6990389479008604e-05, "loss": 0.3874, "num_tokens": 259280520.0, "step": 1356 }, { "epoch": 0.4632189793480116, "grad_norm": 0.4300781127797947, "learning_rate": 4.698406676783005e-05, "loss": 0.3828, "num_tokens": 259460241.0, "step": 1357 }, { "epoch": 0.4635603345280765, "grad_norm": 0.24783973891832523, "learning_rate": 4.69777440566515e-05, "loss": 0.3876, "num_tokens": 259683768.0, "step": 1358 }, { "epoch": 0.46390168970814133, "grad_norm": 0.23698286807878502, "learning_rate": 4.6971421345472945e-05, "loss": 0.4178, "num_tokens": 259919187.0, "step": 1359 }, { "epoch": 0.46424304488820617, "grad_norm": 0.23888169227473727, "learning_rate": 4.6965098634294385e-05, "loss": 0.3724, "num_tokens": 260131351.0, "step": 1360 }, { "epoch": 0.464584400068271, "grad_norm": 0.2667162266923999, "learning_rate": 4.695877592311583e-05, "loss": 0.3915, "num_tokens": 260294329.0, "step": 1361 }, { "epoch": 0.4649257552483359, "grad_norm": 0.24317796987607695, "learning_rate": 4.695245321193728e-05, "loss": 0.3447, "num_tokens": 260461407.0, "step": 1362 }, { "epoch": 0.46526711042840074, "grad_norm": 0.24391003536894187, "learning_rate": 4.6946130500758726e-05, "loss": 0.3824, "num_tokens": 260659607.0, "step": 1363 }, { "epoch": 0.4656084656084656, "grad_norm": 0.26370553064745056, "learning_rate": 4.693980778958017e-05, "loss": 0.3735, "num_tokens": 260858400.0, "step": 1364 }, { "epoch": 0.4659498207885305, "grad_norm": 0.2375834806160306, "learning_rate": 4.6933485078401627e-05, "loss": 0.3638, "num_tokens": 261073843.0, "step": 1365 }, { "epoch": 0.4662911759685953, "grad_norm": 0.2589339732233273, "learning_rate": 4.692716236722307e-05, "loss": 0.3985, "num_tokens": 261248903.0, "step": 1366 }, { "epoch": 0.46663253114866016, "grad_norm": 0.25644221950390617, "learning_rate": 4.6920839656044514e-05, "loss": 0.3727, "num_tokens": 261416923.0, "step": 1367 }, { "epoch": 0.46697388632872505, "grad_norm": 0.2285651904915383, "learning_rate": 4.691451694486596e-05, "loss": 0.3657, "num_tokens": 261591930.0, "step": 1368 }, { "epoch": 0.4673152415087899, "grad_norm": 0.22771137562425825, "learning_rate": 4.690819423368741e-05, "loss": 0.3858, "num_tokens": 261823737.0, "step": 1369 }, { "epoch": 0.46765659668885473, "grad_norm": 0.22529934228350867, "learning_rate": 4.6901871522508855e-05, "loss": 0.3697, "num_tokens": 262055817.0, "step": 1370 }, { "epoch": 0.4679979518689196, "grad_norm": 0.27488752793198096, "learning_rate": 4.68955488113303e-05, "loss": 0.3759, "num_tokens": 262199573.0, "step": 1371 }, { "epoch": 0.46833930704898447, "grad_norm": 0.24382646800996033, "learning_rate": 4.688922610015174e-05, "loss": 0.333, "num_tokens": 262342171.0, "step": 1372 }, { "epoch": 0.4686806622290493, "grad_norm": 0.23466959521026828, "learning_rate": 4.6882903388973196e-05, "loss": 0.3769, "num_tokens": 262550098.0, "step": 1373 }, { "epoch": 0.4690220174091142, "grad_norm": 0.2623454722186805, "learning_rate": 4.687658067779464e-05, "loss": 0.3912, "num_tokens": 262744000.0, "step": 1374 }, { "epoch": 0.46936337258917904, "grad_norm": 0.2335637622471908, "learning_rate": 4.687025796661609e-05, "loss": 0.3592, "num_tokens": 262935371.0, "step": 1375 }, { "epoch": 0.4697047277692439, "grad_norm": 0.24457685066797505, "learning_rate": 4.6863935255437537e-05, "loss": 0.3566, "num_tokens": 263133416.0, "step": 1376 }, { "epoch": 0.4700460829493088, "grad_norm": 0.30598738565218636, "learning_rate": 4.6857612544258984e-05, "loss": 0.3675, "num_tokens": 263303943.0, "step": 1377 }, { "epoch": 0.4703874381293736, "grad_norm": 0.24208687691097244, "learning_rate": 4.6851289833080424e-05, "loss": 0.4114, "num_tokens": 263525620.0, "step": 1378 }, { "epoch": 0.47072879330943845, "grad_norm": 0.24040932681752877, "learning_rate": 4.684496712190187e-05, "loss": 0.4077, "num_tokens": 263740192.0, "step": 1379 }, { "epoch": 0.47107014848950335, "grad_norm": 0.2342719870039781, "learning_rate": 4.683864441072332e-05, "loss": 0.3896, "num_tokens": 263963969.0, "step": 1380 }, { "epoch": 0.4714115036695682, "grad_norm": 0.27532349116981625, "learning_rate": 4.6832321699544765e-05, "loss": 0.3725, "num_tokens": 264137159.0, "step": 1381 }, { "epoch": 0.471752858849633, "grad_norm": 0.268267605480963, "learning_rate": 4.682599898836622e-05, "loss": 0.3983, "num_tokens": 264313088.0, "step": 1382 }, { "epoch": 0.4720942140296979, "grad_norm": 0.25149414442076423, "learning_rate": 4.6819676277187665e-05, "loss": 0.3504, "num_tokens": 264466826.0, "step": 1383 }, { "epoch": 0.47243556920976276, "grad_norm": 0.2602404613070221, "learning_rate": 4.6813353566009106e-05, "loss": 0.3872, "num_tokens": 264685793.0, "step": 1384 }, { "epoch": 0.4727769243898276, "grad_norm": 0.26690665814314884, "learning_rate": 4.680703085483055e-05, "loss": 0.4208, "num_tokens": 264900969.0, "step": 1385 }, { "epoch": 0.4731182795698925, "grad_norm": 0.23433662880339334, "learning_rate": 4.6800708143652e-05, "loss": 0.36, "num_tokens": 265071925.0, "step": 1386 }, { "epoch": 0.47345963474995734, "grad_norm": 0.2639833055064802, "learning_rate": 4.679438543247345e-05, "loss": 0.3905, "num_tokens": 265260517.0, "step": 1387 }, { "epoch": 0.4738009899300222, "grad_norm": 0.27076168773460463, "learning_rate": 4.6788062721294894e-05, "loss": 0.3942, "num_tokens": 265442061.0, "step": 1388 }, { "epoch": 0.47414234511008707, "grad_norm": 0.2525452047886786, "learning_rate": 4.678174001011634e-05, "loss": 0.3963, "num_tokens": 265653122.0, "step": 1389 }, { "epoch": 0.4744837002901519, "grad_norm": 0.24108644546501626, "learning_rate": 4.677541729893779e-05, "loss": 0.3758, "num_tokens": 265873553.0, "step": 1390 }, { "epoch": 0.47482505547021675, "grad_norm": 0.4795989794740037, "learning_rate": 4.6769094587759235e-05, "loss": 0.4127, "num_tokens": 266066503.0, "step": 1391 }, { "epoch": 0.47516641065028165, "grad_norm": 0.2849353776467303, "learning_rate": 4.676277187658068e-05, "loss": 0.3999, "num_tokens": 266230916.0, "step": 1392 }, { "epoch": 0.4755077658303465, "grad_norm": 0.24532624807788028, "learning_rate": 4.675644916540213e-05, "loss": 0.3708, "num_tokens": 266443152.0, "step": 1393 }, { "epoch": 0.4758491210104113, "grad_norm": 0.26470284512568193, "learning_rate": 4.6750126454223576e-05, "loss": 0.3566, "num_tokens": 266606920.0, "step": 1394 }, { "epoch": 0.47619047619047616, "grad_norm": 0.23465921631795889, "learning_rate": 4.674380374304502e-05, "loss": 0.3823, "num_tokens": 266820589.0, "step": 1395 }, { "epoch": 0.47653183137054106, "grad_norm": 0.27038959202228585, "learning_rate": 4.673748103186646e-05, "loss": 0.3664, "num_tokens": 267005293.0, "step": 1396 }, { "epoch": 0.4768731865506059, "grad_norm": 0.24496635687161203, "learning_rate": 4.673115832068791e-05, "loss": 0.379, "num_tokens": 267201816.0, "step": 1397 }, { "epoch": 0.47721454173067074, "grad_norm": 0.2517622645514563, "learning_rate": 4.672483560950936e-05, "loss": 0.3801, "num_tokens": 267379928.0, "step": 1398 }, { "epoch": 0.47755589691073563, "grad_norm": 0.2663748440583196, "learning_rate": 4.671851289833081e-05, "loss": 0.4022, "num_tokens": 267575167.0, "step": 1399 }, { "epoch": 0.4778972520908005, "grad_norm": 0.24307222661025674, "learning_rate": 4.671219018715226e-05, "loss": 0.3663, "num_tokens": 267768464.0, "step": 1400 }, { "epoch": 0.4782386072708653, "grad_norm": 0.26049687201808297, "learning_rate": 4.6705867475973704e-05, "loss": 0.4069, "num_tokens": 267950350.0, "step": 1401 }, { "epoch": 0.4785799624509302, "grad_norm": 0.25368695707419237, "learning_rate": 4.6699544764795145e-05, "loss": 0.393, "num_tokens": 268148635.0, "step": 1402 }, { "epoch": 0.47892131763099505, "grad_norm": 0.23743594988771555, "learning_rate": 4.669322205361659e-05, "loss": 0.353, "num_tokens": 268333761.0, "step": 1403 }, { "epoch": 0.4792626728110599, "grad_norm": 0.27547891852355094, "learning_rate": 4.668689934243804e-05, "loss": 0.4009, "num_tokens": 268530311.0, "step": 1404 }, { "epoch": 0.4796040279911248, "grad_norm": 0.23142724688827068, "learning_rate": 4.6680576631259486e-05, "loss": 0.3892, "num_tokens": 268742699.0, "step": 1405 }, { "epoch": 0.4799453831711896, "grad_norm": 0.2611397488586379, "learning_rate": 4.667425392008093e-05, "loss": 0.3993, "num_tokens": 268944198.0, "step": 1406 }, { "epoch": 0.48028673835125446, "grad_norm": 0.23372353963312933, "learning_rate": 4.666793120890238e-05, "loss": 0.4044, "num_tokens": 269184288.0, "step": 1407 }, { "epoch": 0.48062809353131936, "grad_norm": 0.27456399446942475, "learning_rate": 4.6661608497723826e-05, "loss": 0.3598, "num_tokens": 269328230.0, "step": 1408 }, { "epoch": 0.4809694487113842, "grad_norm": 0.28113483857729527, "learning_rate": 4.6655285786545273e-05, "loss": 0.3995, "num_tokens": 269496742.0, "step": 1409 }, { "epoch": 0.48131080389144903, "grad_norm": 0.24182670323937763, "learning_rate": 4.664896307536672e-05, "loss": 0.3788, "num_tokens": 269701280.0, "step": 1410 }, { "epoch": 0.48165215907151393, "grad_norm": 0.2487615362292684, "learning_rate": 4.664264036418817e-05, "loss": 0.3487, "num_tokens": 269893766.0, "step": 1411 }, { "epoch": 0.48199351425157877, "grad_norm": 0.2565051335711258, "learning_rate": 4.6636317653009614e-05, "loss": 0.4168, "num_tokens": 270105263.0, "step": 1412 }, { "epoch": 0.4823348694316436, "grad_norm": 0.2555651776889327, "learning_rate": 4.6629994941831055e-05, "loss": 0.3973, "num_tokens": 270283018.0, "step": 1413 }, { "epoch": 0.4826762246117085, "grad_norm": 0.7594200997272498, "learning_rate": 4.66236722306525e-05, "loss": 0.3521, "num_tokens": 270490790.0, "step": 1414 }, { "epoch": 0.48301757979177334, "grad_norm": 0.3142457892088676, "learning_rate": 4.661734951947395e-05, "loss": 0.434, "num_tokens": 270688264.0, "step": 1415 }, { "epoch": 0.4833589349718382, "grad_norm": 0.2638167626001933, "learning_rate": 4.66110268082954e-05, "loss": 0.3898, "num_tokens": 270869010.0, "step": 1416 }, { "epoch": 0.4837002901519031, "grad_norm": 0.2787942651426001, "learning_rate": 4.660470409711685e-05, "loss": 0.4113, "num_tokens": 271078593.0, "step": 1417 }, { "epoch": 0.4840416453319679, "grad_norm": 0.3268002365291619, "learning_rate": 4.6598381385938296e-05, "loss": 0.3846, "num_tokens": 271261235.0, "step": 1418 }, { "epoch": 0.48438300051203276, "grad_norm": 0.2565065493528764, "learning_rate": 4.6592058674759737e-05, "loss": 0.36, "num_tokens": 271434089.0, "step": 1419 }, { "epoch": 0.48472435569209765, "grad_norm": 0.24835289645730116, "learning_rate": 4.6585735963581184e-05, "loss": 0.3951, "num_tokens": 271640855.0, "step": 1420 }, { "epoch": 0.4850657108721625, "grad_norm": 0.28019019982875465, "learning_rate": 4.657941325240263e-05, "loss": 0.3877, "num_tokens": 271806986.0, "step": 1421 }, { "epoch": 0.48540706605222733, "grad_norm": 0.25466667797056775, "learning_rate": 4.657309054122408e-05, "loss": 0.4177, "num_tokens": 272057583.0, "step": 1422 }, { "epoch": 0.4857484212322922, "grad_norm": 0.2451053295397321, "learning_rate": 4.6566767830045524e-05, "loss": 0.3957, "num_tokens": 272284872.0, "step": 1423 }, { "epoch": 0.48608977641235707, "grad_norm": 0.24707514152417334, "learning_rate": 4.656044511886697e-05, "loss": 0.354, "num_tokens": 272465620.0, "step": 1424 }, { "epoch": 0.4864311315924219, "grad_norm": 0.26883205070715316, "learning_rate": 4.655412240768842e-05, "loss": 0.3818, "num_tokens": 272675516.0, "step": 1425 }, { "epoch": 0.48677248677248675, "grad_norm": 0.27963845807571364, "learning_rate": 4.6547799696509865e-05, "loss": 0.4031, "num_tokens": 272826917.0, "step": 1426 }, { "epoch": 0.48711384195255164, "grad_norm": 0.28512606537553803, "learning_rate": 4.654147698533131e-05, "loss": 0.4357, "num_tokens": 273028319.0, "step": 1427 }, { "epoch": 0.4874551971326165, "grad_norm": 0.25189645531585153, "learning_rate": 4.653515427415276e-05, "loss": 0.3537, "num_tokens": 273262215.0, "step": 1428 }, { "epoch": 0.4877965523126813, "grad_norm": 0.23660213204841812, "learning_rate": 4.6528831562974206e-05, "loss": 0.3784, "num_tokens": 273476895.0, "step": 1429 }, { "epoch": 0.4881379074927462, "grad_norm": 0.29953518971064547, "learning_rate": 4.652250885179565e-05, "loss": 0.4119, "num_tokens": 273651315.0, "step": 1430 }, { "epoch": 0.48847926267281105, "grad_norm": 0.23537328131894922, "learning_rate": 4.6516186140617094e-05, "loss": 0.3795, "num_tokens": 273862848.0, "step": 1431 }, { "epoch": 0.4888206178528759, "grad_norm": 0.29048357373356976, "learning_rate": 4.650986342943854e-05, "loss": 0.3907, "num_tokens": 274071179.0, "step": 1432 }, { "epoch": 0.4891619730329408, "grad_norm": 0.2479640523990853, "learning_rate": 4.6503540718259994e-05, "loss": 0.3672, "num_tokens": 274232366.0, "step": 1433 }, { "epoch": 0.48950332821300563, "grad_norm": 0.23204409808874668, "learning_rate": 4.649721800708144e-05, "loss": 0.349, "num_tokens": 274413252.0, "step": 1434 }, { "epoch": 0.48984468339307047, "grad_norm": 0.26582821553017855, "learning_rate": 4.649089529590289e-05, "loss": 0.4113, "num_tokens": 274598941.0, "step": 1435 }, { "epoch": 0.49018603857313536, "grad_norm": 0.2773643439802325, "learning_rate": 4.6484572584724335e-05, "loss": 0.3998, "num_tokens": 274787537.0, "step": 1436 }, { "epoch": 0.4905273937532002, "grad_norm": 0.27386151984165624, "learning_rate": 4.6478249873545775e-05, "loss": 0.4216, "num_tokens": 274971869.0, "step": 1437 }, { "epoch": 0.49086874893326504, "grad_norm": 0.24367835586714168, "learning_rate": 4.647192716236722e-05, "loss": 0.3429, "num_tokens": 275167454.0, "step": 1438 }, { "epoch": 0.49121010411332994, "grad_norm": 0.254343480550134, "learning_rate": 4.646560445118867e-05, "loss": 0.3967, "num_tokens": 275334490.0, "step": 1439 }, { "epoch": 0.4915514592933948, "grad_norm": 0.2780494354357428, "learning_rate": 4.6459281740010116e-05, "loss": 0.4323, "num_tokens": 275516422.0, "step": 1440 }, { "epoch": 0.4918928144734596, "grad_norm": 0.2812354515607187, "learning_rate": 4.645295902883156e-05, "loss": 0.3957, "num_tokens": 275719562.0, "step": 1441 }, { "epoch": 0.4922341696535245, "grad_norm": 0.2630721806690902, "learning_rate": 4.644663631765302e-05, "loss": 0.3858, "num_tokens": 275874521.0, "step": 1442 }, { "epoch": 0.49257552483358935, "grad_norm": 0.22408802773992997, "learning_rate": 4.644031360647446e-05, "loss": 0.3359, "num_tokens": 276058131.0, "step": 1443 }, { "epoch": 0.4929168800136542, "grad_norm": 0.25195974184238923, "learning_rate": 4.6433990895295904e-05, "loss": 0.3665, "num_tokens": 276220623.0, "step": 1444 }, { "epoch": 0.4932582351937191, "grad_norm": 0.2720711275221185, "learning_rate": 4.642766818411735e-05, "loss": 0.3689, "num_tokens": 276382628.0, "step": 1445 }, { "epoch": 0.4935995903737839, "grad_norm": 0.27306859122972205, "learning_rate": 4.64213454729388e-05, "loss": 0.3921, "num_tokens": 276544864.0, "step": 1446 }, { "epoch": 0.49394094555384876, "grad_norm": 0.25107928855778966, "learning_rate": 4.6415022761760245e-05, "loss": 0.3511, "num_tokens": 276727866.0, "step": 1447 }, { "epoch": 0.49428230073391366, "grad_norm": 0.2368619198998912, "learning_rate": 4.640870005058169e-05, "loss": 0.361, "num_tokens": 276932050.0, "step": 1448 }, { "epoch": 0.4946236559139785, "grad_norm": 0.24354759868794815, "learning_rate": 4.640237733940313e-05, "loss": 0.3478, "num_tokens": 277143821.0, "step": 1449 }, { "epoch": 0.49496501109404334, "grad_norm": 0.2612888474503006, "learning_rate": 4.6396054628224586e-05, "loss": 0.3955, "num_tokens": 277347401.0, "step": 1450 }, { "epoch": 0.49530636627410823, "grad_norm": 0.26948729142309163, "learning_rate": 4.638973191704603e-05, "loss": 0.4085, "num_tokens": 277533134.0, "step": 1451 }, { "epoch": 0.4956477214541731, "grad_norm": 0.2503787169837371, "learning_rate": 4.638340920586748e-05, "loss": 0.3811, "num_tokens": 277706530.0, "step": 1452 }, { "epoch": 0.4959890766342379, "grad_norm": 0.28992084698814974, "learning_rate": 4.637708649468893e-05, "loss": 0.3807, "num_tokens": 277884123.0, "step": 1453 }, { "epoch": 0.4963304318143028, "grad_norm": 0.2654821583388445, "learning_rate": 4.6370763783510374e-05, "loss": 0.3834, "num_tokens": 278085390.0, "step": 1454 }, { "epoch": 0.49667178699436765, "grad_norm": 0.31314594625136605, "learning_rate": 4.6364441072331814e-05, "loss": 0.3789, "num_tokens": 278267509.0, "step": 1455 }, { "epoch": 0.4970131421744325, "grad_norm": 0.25465455199350123, "learning_rate": 4.635811836115326e-05, "loss": 0.3579, "num_tokens": 278431782.0, "step": 1456 }, { "epoch": 0.4973544973544973, "grad_norm": 0.26982732065494147, "learning_rate": 4.635179564997471e-05, "loss": 0.3925, "num_tokens": 278617210.0, "step": 1457 }, { "epoch": 0.4976958525345622, "grad_norm": 0.25801270308152574, "learning_rate": 4.6345472938796155e-05, "loss": 0.3836, "num_tokens": 278821422.0, "step": 1458 }, { "epoch": 0.49803720771462706, "grad_norm": 0.28181368053736183, "learning_rate": 4.633915022761761e-05, "loss": 0.3873, "num_tokens": 279007811.0, "step": 1459 }, { "epoch": 0.4983785628946919, "grad_norm": 0.2789711992903173, "learning_rate": 4.6332827516439056e-05, "loss": 0.3685, "num_tokens": 279160769.0, "step": 1460 }, { "epoch": 0.4987199180747568, "grad_norm": 0.2536585573951013, "learning_rate": 4.6326504805260496e-05, "loss": 0.374, "num_tokens": 279361027.0, "step": 1461 }, { "epoch": 0.49906127325482164, "grad_norm": 0.2618909576018957, "learning_rate": 4.632018209408194e-05, "loss": 0.3847, "num_tokens": 279562544.0, "step": 1462 }, { "epoch": 0.4994026284348865, "grad_norm": 0.22539236541607818, "learning_rate": 4.631385938290339e-05, "loss": 0.3622, "num_tokens": 279753521.0, "step": 1463 }, { "epoch": 0.49974398361495137, "grad_norm": 0.23218679031203096, "learning_rate": 4.630753667172484e-05, "loss": 0.39, "num_tokens": 279998011.0, "step": 1464 }, { "epoch": 0.5000853387950163, "grad_norm": 0.2444928269152764, "learning_rate": 4.6301213960546284e-05, "loss": 0.3681, "num_tokens": 280208386.0, "step": 1465 }, { "epoch": 0.5004266939750811, "grad_norm": 0.22726451074604778, "learning_rate": 4.629489124936773e-05, "loss": 0.3384, "num_tokens": 280381101.0, "step": 1466 }, { "epoch": 0.500768049155146, "grad_norm": 0.4391007662670566, "learning_rate": 4.628856853818918e-05, "loss": 0.3985, "num_tokens": 280528322.0, "step": 1467 }, { "epoch": 0.5011094043352108, "grad_norm": 0.24575227844420586, "learning_rate": 4.6282245827010625e-05, "loss": 0.3801, "num_tokens": 280723377.0, "step": 1468 }, { "epoch": 0.5014507595152756, "grad_norm": 0.2633855033922378, "learning_rate": 4.627592311583207e-05, "loss": 0.3784, "num_tokens": 280909681.0, "step": 1469 }, { "epoch": 0.5017921146953405, "grad_norm": 0.2352301567191628, "learning_rate": 4.626960040465352e-05, "loss": 0.3599, "num_tokens": 281115392.0, "step": 1470 }, { "epoch": 0.5021334698754054, "grad_norm": 0.2859396701871947, "learning_rate": 4.6263277693474966e-05, "loss": 0.4151, "num_tokens": 281301845.0, "step": 1471 }, { "epoch": 0.5024748250554703, "grad_norm": 0.22881193076529519, "learning_rate": 4.625695498229641e-05, "loss": 0.3635, "num_tokens": 281516087.0, "step": 1472 }, { "epoch": 0.5028161802355351, "grad_norm": 0.272231574431777, "learning_rate": 4.625063227111785e-05, "loss": 0.4086, "num_tokens": 281706887.0, "step": 1473 }, { "epoch": 0.5031575354155999, "grad_norm": 0.2458353200405557, "learning_rate": 4.62443095599393e-05, "loss": 0.3739, "num_tokens": 281940270.0, "step": 1474 }, { "epoch": 0.5034988905956648, "grad_norm": 0.24845530050745038, "learning_rate": 4.623798684876075e-05, "loss": 0.3883, "num_tokens": 282165085.0, "step": 1475 }, { "epoch": 0.5038402457757296, "grad_norm": 0.25494553204037057, "learning_rate": 4.62316641375822e-05, "loss": 0.4, "num_tokens": 282369403.0, "step": 1476 }, { "epoch": 0.5041816009557945, "grad_norm": 0.2617317233373788, "learning_rate": 4.622534142640365e-05, "loss": 0.3895, "num_tokens": 282544486.0, "step": 1477 }, { "epoch": 0.5045229561358594, "grad_norm": 0.2672171764625302, "learning_rate": 4.6219018715225095e-05, "loss": 0.376, "num_tokens": 282706701.0, "step": 1478 }, { "epoch": 0.5048643113159242, "grad_norm": 0.260057717986449, "learning_rate": 4.6212696004046535e-05, "loss": 0.3464, "num_tokens": 282888830.0, "step": 1479 }, { "epoch": 0.5052056664959891, "grad_norm": 0.2919330126683288, "learning_rate": 4.620637329286798e-05, "loss": 0.38, "num_tokens": 283114771.0, "step": 1480 }, { "epoch": 0.5055470216760539, "grad_norm": 0.22942050931931002, "learning_rate": 4.620005058168943e-05, "loss": 0.3484, "num_tokens": 283307353.0, "step": 1481 }, { "epoch": 0.5058883768561188, "grad_norm": 0.27110868714286984, "learning_rate": 4.6193727870510876e-05, "loss": 0.3952, "num_tokens": 283483069.0, "step": 1482 }, { "epoch": 0.5062297320361836, "grad_norm": 0.21376111452870106, "learning_rate": 4.618740515933232e-05, "loss": 0.3203, "num_tokens": 283675030.0, "step": 1483 }, { "epoch": 0.5065710872162486, "grad_norm": 0.24370202046907782, "learning_rate": 4.618108244815377e-05, "loss": 0.3813, "num_tokens": 283863918.0, "step": 1484 }, { "epoch": 0.5069124423963134, "grad_norm": 0.26917210405103026, "learning_rate": 4.617475973697522e-05, "loss": 0.384, "num_tokens": 284033485.0, "step": 1485 }, { "epoch": 0.5072537975763782, "grad_norm": 0.260092917733347, "learning_rate": 4.6168437025796664e-05, "loss": 0.3984, "num_tokens": 284248201.0, "step": 1486 }, { "epoch": 0.5075951527564431, "grad_norm": 0.227604941430086, "learning_rate": 4.616211431461811e-05, "loss": 0.3907, "num_tokens": 284467025.0, "step": 1487 }, { "epoch": 0.5079365079365079, "grad_norm": 0.24271448667683265, "learning_rate": 4.615579160343956e-05, "loss": 0.3692, "num_tokens": 284664791.0, "step": 1488 }, { "epoch": 0.5082778631165727, "grad_norm": 0.2524606528680385, "learning_rate": 4.6149468892261005e-05, "loss": 0.4244, "num_tokens": 284881278.0, "step": 1489 }, { "epoch": 0.5086192182966377, "grad_norm": 0.2340830063457241, "learning_rate": 4.614314618108245e-05, "loss": 0.3705, "num_tokens": 285084382.0, "step": 1490 }, { "epoch": 0.5089605734767025, "grad_norm": 0.25080166511487767, "learning_rate": 4.613682346990389e-05, "loss": 0.3852, "num_tokens": 285268430.0, "step": 1491 }, { "epoch": 0.5093019286567674, "grad_norm": 0.3132175295192029, "learning_rate": 4.613050075872534e-05, "loss": 0.3868, "num_tokens": 285469016.0, "step": 1492 }, { "epoch": 0.5096432838368322, "grad_norm": 0.25403510236882704, "learning_rate": 4.612417804754679e-05, "loss": 0.4042, "num_tokens": 285673942.0, "step": 1493 }, { "epoch": 0.5099846390168971, "grad_norm": 0.238837221851603, "learning_rate": 4.611785533636824e-05, "loss": 0.3539, "num_tokens": 285858528.0, "step": 1494 }, { "epoch": 0.5103259941969619, "grad_norm": 0.24788254181177513, "learning_rate": 4.611153262518969e-05, "loss": 0.3757, "num_tokens": 286028660.0, "step": 1495 }, { "epoch": 0.5106673493770268, "grad_norm": 0.23225260105432655, "learning_rate": 4.6105209914011134e-05, "loss": 0.3662, "num_tokens": 286221961.0, "step": 1496 }, { "epoch": 0.5110087045570917, "grad_norm": 0.27011401414157993, "learning_rate": 4.6098887202832574e-05, "loss": 0.4233, "num_tokens": 286386401.0, "step": 1497 }, { "epoch": 0.5113500597371565, "grad_norm": 0.247680098838634, "learning_rate": 4.609256449165402e-05, "loss": 0.3812, "num_tokens": 286571053.0, "step": 1498 }, { "epoch": 0.5116914149172214, "grad_norm": 0.24807216317247288, "learning_rate": 4.608624178047547e-05, "loss": 0.3909, "num_tokens": 286794565.0, "step": 1499 }, { "epoch": 0.5120327700972862, "grad_norm": 0.23189994333100405, "learning_rate": 4.6079919069296915e-05, "loss": 0.4012, "num_tokens": 287018608.0, "step": 1500 }, { "epoch": 0.512374125277351, "grad_norm": 0.3363563008605181, "learning_rate": 4.607359635811836e-05, "loss": 0.3564, "num_tokens": 287192333.0, "step": 1501 }, { "epoch": 0.512715480457416, "grad_norm": 0.23841078156216083, "learning_rate": 4.6067273646939816e-05, "loss": 0.3776, "num_tokens": 287377019.0, "step": 1502 }, { "epoch": 0.5130568356374808, "grad_norm": 0.2322228142503443, "learning_rate": 4.6060950935761256e-05, "loss": 0.359, "num_tokens": 287567627.0, "step": 1503 }, { "epoch": 0.5133981908175457, "grad_norm": 0.2546992030167577, "learning_rate": 4.60546282245827e-05, "loss": 0.3545, "num_tokens": 287751343.0, "step": 1504 }, { "epoch": 0.5137395459976105, "grad_norm": 0.25792897119804165, "learning_rate": 4.604830551340415e-05, "loss": 0.3819, "num_tokens": 287940794.0, "step": 1505 }, { "epoch": 0.5140809011776754, "grad_norm": 0.23964402243012214, "learning_rate": 4.60419828022256e-05, "loss": 0.3475, "num_tokens": 288101386.0, "step": 1506 }, { "epoch": 0.5144222563577402, "grad_norm": 0.24299337543860378, "learning_rate": 4.6035660091047044e-05, "loss": 0.3519, "num_tokens": 288285771.0, "step": 1507 }, { "epoch": 0.514763611537805, "grad_norm": 0.23854912389117558, "learning_rate": 4.602933737986849e-05, "loss": 0.3698, "num_tokens": 288495832.0, "step": 1508 }, { "epoch": 0.51510496671787, "grad_norm": 0.24672696728990032, "learning_rate": 4.602301466868993e-05, "loss": 0.4144, "num_tokens": 288729187.0, "step": 1509 }, { "epoch": 0.5154463218979348, "grad_norm": 0.2640586426362481, "learning_rate": 4.6016691957511385e-05, "loss": 0.3544, "num_tokens": 288900415.0, "step": 1510 }, { "epoch": 0.5157876770779997, "grad_norm": 0.2279766227161997, "learning_rate": 4.601036924633283e-05, "loss": 0.3566, "num_tokens": 289101593.0, "step": 1511 }, { "epoch": 0.5161290322580645, "grad_norm": 0.26325223929200176, "learning_rate": 4.600404653515428e-05, "loss": 0.3963, "num_tokens": 289276282.0, "step": 1512 }, { "epoch": 0.5164703874381293, "grad_norm": 0.24896000214728678, "learning_rate": 4.5997723823975726e-05, "loss": 0.3953, "num_tokens": 289475601.0, "step": 1513 }, { "epoch": 0.5168117426181942, "grad_norm": 0.24533188078835516, "learning_rate": 4.599140111279717e-05, "loss": 0.3661, "num_tokens": 289643133.0, "step": 1514 }, { "epoch": 0.5171530977982591, "grad_norm": 0.24607904177181297, "learning_rate": 4.598507840161861e-05, "loss": 0.3682, "num_tokens": 289827767.0, "step": 1515 }, { "epoch": 0.517494452978324, "grad_norm": 0.2585366470333219, "learning_rate": 4.597875569044006e-05, "loss": 0.3711, "num_tokens": 289999950.0, "step": 1516 }, { "epoch": 0.5178358081583888, "grad_norm": 0.2731039043624873, "learning_rate": 4.597243297926151e-05, "loss": 0.418, "num_tokens": 290172813.0, "step": 1517 }, { "epoch": 0.5181771633384537, "grad_norm": 0.25860314021737135, "learning_rate": 4.5966110268082954e-05, "loss": 0.369, "num_tokens": 290335646.0, "step": 1518 }, { "epoch": 0.5185185185185185, "grad_norm": 0.26000104652845285, "learning_rate": 4.595978755690441e-05, "loss": 0.3679, "num_tokens": 290497706.0, "step": 1519 }, { "epoch": 0.5188598736985833, "grad_norm": 0.2512202517557529, "learning_rate": 4.5953464845725855e-05, "loss": 0.3926, "num_tokens": 290693108.0, "step": 1520 }, { "epoch": 0.5192012288786483, "grad_norm": 0.2626070334372715, "learning_rate": 4.5947142134547295e-05, "loss": 0.4224, "num_tokens": 290865833.0, "step": 1521 }, { "epoch": 0.5195425840587131, "grad_norm": 0.2650336677663234, "learning_rate": 4.594081942336874e-05, "loss": 0.4138, "num_tokens": 291059486.0, "step": 1522 }, { "epoch": 0.519883939238778, "grad_norm": 0.30714983283008096, "learning_rate": 4.593449671219019e-05, "loss": 0.3906, "num_tokens": 291221828.0, "step": 1523 }, { "epoch": 0.5202252944188428, "grad_norm": 0.2291575526811281, "learning_rate": 4.5928174001011636e-05, "loss": 0.3811, "num_tokens": 291466376.0, "step": 1524 }, { "epoch": 0.5205666495989076, "grad_norm": 0.22742665491662672, "learning_rate": 4.592185128983308e-05, "loss": 0.3617, "num_tokens": 291676275.0, "step": 1525 }, { "epoch": 0.5209080047789725, "grad_norm": 0.2317001766157761, "learning_rate": 4.591552857865453e-05, "loss": 0.3568, "num_tokens": 291865476.0, "step": 1526 }, { "epoch": 0.5212493599590374, "grad_norm": 0.2644265325695328, "learning_rate": 4.590920586747598e-05, "loss": 0.3935, "num_tokens": 292081162.0, "step": 1527 }, { "epoch": 0.5215907151391023, "grad_norm": 0.2588684852211304, "learning_rate": 4.5902883156297424e-05, "loss": 0.3604, "num_tokens": 292262519.0, "step": 1528 }, { "epoch": 0.5219320703191671, "grad_norm": 0.256968256178332, "learning_rate": 4.589656044511887e-05, "loss": 0.3977, "num_tokens": 292461882.0, "step": 1529 }, { "epoch": 0.522273425499232, "grad_norm": 0.22483673358487918, "learning_rate": 4.589023773394032e-05, "loss": 0.363, "num_tokens": 292667739.0, "step": 1530 }, { "epoch": 0.5226147806792968, "grad_norm": 0.2775235558976276, "learning_rate": 4.5883915022761765e-05, "loss": 0.4146, "num_tokens": 292844280.0, "step": 1531 }, { "epoch": 0.5229561358593616, "grad_norm": 0.23787754359344407, "learning_rate": 4.587759231158321e-05, "loss": 0.3842, "num_tokens": 293044412.0, "step": 1532 }, { "epoch": 0.5232974910394266, "grad_norm": 0.2463060268284532, "learning_rate": 4.587126960040465e-05, "loss": 0.339, "num_tokens": 293199859.0, "step": 1533 }, { "epoch": 0.5236388462194914, "grad_norm": 0.2722845830621169, "learning_rate": 4.58649468892261e-05, "loss": 0.407, "num_tokens": 293368064.0, "step": 1534 }, { "epoch": 0.5239802013995563, "grad_norm": 0.25831077141724484, "learning_rate": 4.5858624178047546e-05, "loss": 0.3729, "num_tokens": 293538158.0, "step": 1535 }, { "epoch": 0.5243215565796211, "grad_norm": 0.2719612401148447, "learning_rate": 4.5852301466869e-05, "loss": 0.3944, "num_tokens": 293751272.0, "step": 1536 }, { "epoch": 0.5246629117596859, "grad_norm": 0.25862676721388467, "learning_rate": 4.584597875569045e-05, "loss": 0.3422, "num_tokens": 293954278.0, "step": 1537 }, { "epoch": 0.5250042669397508, "grad_norm": 0.2661939167065671, "learning_rate": 4.5839656044511894e-05, "loss": 0.4033, "num_tokens": 294158681.0, "step": 1538 }, { "epoch": 0.5253456221198156, "grad_norm": 0.23054345484433894, "learning_rate": 4.5833333333333334e-05, "loss": 0.3616, "num_tokens": 294339588.0, "step": 1539 }, { "epoch": 0.5256869772998806, "grad_norm": 0.26406097911646664, "learning_rate": 4.582701062215478e-05, "loss": 0.3707, "num_tokens": 294508396.0, "step": 1540 }, { "epoch": 0.5260283324799454, "grad_norm": 0.28055625641842663, "learning_rate": 4.582068791097623e-05, "loss": 0.3932, "num_tokens": 294725078.0, "step": 1541 }, { "epoch": 0.5263696876600102, "grad_norm": 0.2468709423633231, "learning_rate": 4.5814365199797675e-05, "loss": 0.3835, "num_tokens": 294920894.0, "step": 1542 }, { "epoch": 0.5267110428400751, "grad_norm": 0.2823115632078792, "learning_rate": 4.580804248861912e-05, "loss": 0.388, "num_tokens": 295103937.0, "step": 1543 }, { "epoch": 0.5270523980201399, "grad_norm": 0.2647925864862645, "learning_rate": 4.580171977744057e-05, "loss": 0.3735, "num_tokens": 295285166.0, "step": 1544 }, { "epoch": 0.5273937532002048, "grad_norm": 0.31372521342982934, "learning_rate": 4.5795397066262016e-05, "loss": 0.3691, "num_tokens": 295475200.0, "step": 1545 }, { "epoch": 0.5277351083802697, "grad_norm": 0.2673288540793705, "learning_rate": 4.578907435508346e-05, "loss": 0.3402, "num_tokens": 295628138.0, "step": 1546 }, { "epoch": 0.5280764635603346, "grad_norm": 0.2487941597967046, "learning_rate": 4.578275164390491e-05, "loss": 0.3657, "num_tokens": 295815893.0, "step": 1547 }, { "epoch": 0.5284178187403994, "grad_norm": 0.2675469504757164, "learning_rate": 4.577642893272636e-05, "loss": 0.4087, "num_tokens": 296023135.0, "step": 1548 }, { "epoch": 0.5287591739204642, "grad_norm": 0.2539838984466555, "learning_rate": 4.5770106221547804e-05, "loss": 0.3944, "num_tokens": 296222351.0, "step": 1549 }, { "epoch": 0.5291005291005291, "grad_norm": 0.24991561302854104, "learning_rate": 4.5763783510369244e-05, "loss": 0.3816, "num_tokens": 296415815.0, "step": 1550 }, { "epoch": 0.5294418842805939, "grad_norm": 0.2567979659057442, "learning_rate": 4.575746079919069e-05, "loss": 0.3792, "num_tokens": 296602976.0, "step": 1551 }, { "epoch": 0.5297832394606589, "grad_norm": 0.2629725568244225, "learning_rate": 4.575113808801214e-05, "loss": 0.401, "num_tokens": 296813925.0, "step": 1552 }, { "epoch": 0.5301245946407237, "grad_norm": 0.23639338458128795, "learning_rate": 4.574481537683359e-05, "loss": 0.4078, "num_tokens": 297062004.0, "step": 1553 }, { "epoch": 0.5304659498207885, "grad_norm": 0.2552219445355419, "learning_rate": 4.573849266565504e-05, "loss": 0.3763, "num_tokens": 297250569.0, "step": 1554 }, { "epoch": 0.5308073050008534, "grad_norm": 0.2721179066717326, "learning_rate": 4.5732169954476486e-05, "loss": 0.3743, "num_tokens": 297449716.0, "step": 1555 }, { "epoch": 0.5311486601809182, "grad_norm": 0.27530752654458324, "learning_rate": 4.5725847243297926e-05, "loss": 0.3453, "num_tokens": 297601036.0, "step": 1556 }, { "epoch": 0.5314900153609831, "grad_norm": 0.2563128143914109, "learning_rate": 4.571952453211937e-05, "loss": 0.4006, "num_tokens": 297769269.0, "step": 1557 }, { "epoch": 0.531831370541048, "grad_norm": 0.26174241322194414, "learning_rate": 4.571320182094082e-05, "loss": 0.3783, "num_tokens": 297965572.0, "step": 1558 }, { "epoch": 0.5321727257211128, "grad_norm": 0.2767952315728813, "learning_rate": 4.570687910976227e-05, "loss": 0.3774, "num_tokens": 298119312.0, "step": 1559 }, { "epoch": 0.5325140809011777, "grad_norm": 0.25203733912709214, "learning_rate": 4.5700556398583714e-05, "loss": 0.3845, "num_tokens": 298298063.0, "step": 1560 }, { "epoch": 0.5328554360812425, "grad_norm": 0.24381163387564755, "learning_rate": 4.569423368740516e-05, "loss": 0.3558, "num_tokens": 298504184.0, "step": 1561 }, { "epoch": 0.5331967912613074, "grad_norm": 0.2739028985278158, "learning_rate": 4.568791097622661e-05, "loss": 0.4009, "num_tokens": 298701137.0, "step": 1562 }, { "epoch": 0.5335381464413722, "grad_norm": 0.24064048262993926, "learning_rate": 4.5681588265048055e-05, "loss": 0.3788, "num_tokens": 298880312.0, "step": 1563 }, { "epoch": 0.5338795016214372, "grad_norm": 0.2612556081658141, "learning_rate": 4.56752655538695e-05, "loss": 0.3957, "num_tokens": 299083432.0, "step": 1564 }, { "epoch": 0.534220856801502, "grad_norm": 0.23449314447216316, "learning_rate": 4.566894284269095e-05, "loss": 0.3731, "num_tokens": 299293522.0, "step": 1565 }, { "epoch": 0.5345622119815668, "grad_norm": 0.23966779566678661, "learning_rate": 4.5662620131512396e-05, "loss": 0.4122, "num_tokens": 299521572.0, "step": 1566 }, { "epoch": 0.5349035671616317, "grad_norm": 0.24561744267417213, "learning_rate": 4.565629742033384e-05, "loss": 0.3672, "num_tokens": 299715554.0, "step": 1567 }, { "epoch": 0.5352449223416965, "grad_norm": 0.2707346007607687, "learning_rate": 4.564997470915528e-05, "loss": 0.3679, "num_tokens": 299865500.0, "step": 1568 }, { "epoch": 0.5355862775217614, "grad_norm": 0.24564498588899775, "learning_rate": 4.564365199797673e-05, "loss": 0.3585, "num_tokens": 300038595.0, "step": 1569 }, { "epoch": 0.5359276327018262, "grad_norm": 0.22821254760541607, "learning_rate": 4.5637329286798184e-05, "loss": 0.3513, "num_tokens": 300233500.0, "step": 1570 }, { "epoch": 0.5362689878818911, "grad_norm": 0.2933386791489699, "learning_rate": 4.563100657561963e-05, "loss": 0.388, "num_tokens": 300379403.0, "step": 1571 }, { "epoch": 0.536610343061956, "grad_norm": 0.26949729117003196, "learning_rate": 4.562468386444108e-05, "loss": 0.4067, "num_tokens": 300571367.0, "step": 1572 }, { "epoch": 0.5369516982420208, "grad_norm": 0.2374003617888241, "learning_rate": 4.5618361153262524e-05, "loss": 0.3369, "num_tokens": 300736624.0, "step": 1573 }, { "epoch": 0.5372930534220857, "grad_norm": 0.2451212961860912, "learning_rate": 4.5612038442083965e-05, "loss": 0.3884, "num_tokens": 300901967.0, "step": 1574 }, { "epoch": 0.5376344086021505, "grad_norm": 0.2659960018765918, "learning_rate": 4.560571573090541e-05, "loss": 0.3524, "num_tokens": 301059107.0, "step": 1575 }, { "epoch": 0.5379757637822153, "grad_norm": 0.24613050594234526, "learning_rate": 4.559939301972686e-05, "loss": 0.3767, "num_tokens": 301279233.0, "step": 1576 }, { "epoch": 0.5383171189622803, "grad_norm": 0.23570076770378265, "learning_rate": 4.5593070308548306e-05, "loss": 0.3431, "num_tokens": 301438118.0, "step": 1577 }, { "epoch": 0.5386584741423451, "grad_norm": 0.2329559254932189, "learning_rate": 4.558674759736975e-05, "loss": 0.3448, "num_tokens": 301640326.0, "step": 1578 }, { "epoch": 0.53899982932241, "grad_norm": 0.23546583631575485, "learning_rate": 4.5580424886191206e-05, "loss": 0.3846, "num_tokens": 301811826.0, "step": 1579 }, { "epoch": 0.5393411845024748, "grad_norm": 0.2422983996346253, "learning_rate": 4.5574102175012647e-05, "loss": 0.3704, "num_tokens": 302006131.0, "step": 1580 }, { "epoch": 0.5396825396825397, "grad_norm": 0.2322149956275764, "learning_rate": 4.5567779463834094e-05, "loss": 0.3796, "num_tokens": 302216938.0, "step": 1581 }, { "epoch": 0.5400238948626045, "grad_norm": 1.6965455621572934, "learning_rate": 4.556145675265554e-05, "loss": 0.3848, "num_tokens": 302402330.0, "step": 1582 }, { "epoch": 0.5403652500426694, "grad_norm": 0.2863426221349968, "learning_rate": 4.555513404147699e-05, "loss": 0.39, "num_tokens": 302567756.0, "step": 1583 }, { "epoch": 0.5407066052227343, "grad_norm": 0.24333560369719612, "learning_rate": 4.5548811330298435e-05, "loss": 0.3884, "num_tokens": 302792934.0, "step": 1584 }, { "epoch": 0.5410479604027991, "grad_norm": 0.23493939647370496, "learning_rate": 4.554248861911988e-05, "loss": 0.4131, "num_tokens": 303029857.0, "step": 1585 }, { "epoch": 0.541389315582864, "grad_norm": 0.25675286026827826, "learning_rate": 4.553616590794132e-05, "loss": 0.4004, "num_tokens": 303221864.0, "step": 1586 }, { "epoch": 0.5417306707629288, "grad_norm": 0.2685566962757259, "learning_rate": 4.5529843196762775e-05, "loss": 0.3842, "num_tokens": 303388519.0, "step": 1587 }, { "epoch": 0.5420720259429936, "grad_norm": 0.291545675922633, "learning_rate": 4.552352048558422e-05, "loss": 0.4087, "num_tokens": 303538302.0, "step": 1588 }, { "epoch": 0.5424133811230586, "grad_norm": 0.22560992814632014, "learning_rate": 4.551719777440567e-05, "loss": 0.3377, "num_tokens": 303727390.0, "step": 1589 }, { "epoch": 0.5427547363031234, "grad_norm": 0.27012547349781196, "learning_rate": 4.5510875063227116e-05, "loss": 0.3688, "num_tokens": 303906007.0, "step": 1590 }, { "epoch": 0.5430960914831883, "grad_norm": 0.28066405229199537, "learning_rate": 4.5504552352048563e-05, "loss": 0.416, "num_tokens": 304114458.0, "step": 1591 }, { "epoch": 0.5434374466632531, "grad_norm": 0.2676822523732953, "learning_rate": 4.5498229640870004e-05, "loss": 0.3594, "num_tokens": 304270467.0, "step": 1592 }, { "epoch": 0.543778801843318, "grad_norm": 0.22698212892648525, "learning_rate": 4.549190692969145e-05, "loss": 0.3799, "num_tokens": 304485965.0, "step": 1593 }, { "epoch": 0.5441201570233828, "grad_norm": 0.28879450839979826, "learning_rate": 4.54855842185129e-05, "loss": 0.3904, "num_tokens": 304666745.0, "step": 1594 }, { "epoch": 0.5444615122034477, "grad_norm": 0.2517020380726552, "learning_rate": 4.5479261507334345e-05, "loss": 0.3782, "num_tokens": 304830990.0, "step": 1595 }, { "epoch": 0.5448028673835126, "grad_norm": 0.26313704889122663, "learning_rate": 4.54729387961558e-05, "loss": 0.3964, "num_tokens": 305006112.0, "step": 1596 }, { "epoch": 0.5451442225635774, "grad_norm": 0.24052382245412465, "learning_rate": 4.5466616084977245e-05, "loss": 0.378, "num_tokens": 305195232.0, "step": 1597 }, { "epoch": 0.5454855777436423, "grad_norm": 0.23705336812486574, "learning_rate": 4.5460293373798686e-05, "loss": 0.4035, "num_tokens": 305424594.0, "step": 1598 }, { "epoch": 0.5458269329237071, "grad_norm": 0.23957424633078578, "learning_rate": 4.545397066262013e-05, "loss": 0.3631, "num_tokens": 305583669.0, "step": 1599 }, { "epoch": 0.5461682881037719, "grad_norm": 0.24881917818534005, "learning_rate": 4.544764795144158e-05, "loss": 0.3523, "num_tokens": 305737479.0, "step": 1600 }, { "epoch": 0.5465096432838368, "grad_norm": 0.23741576968003814, "learning_rate": 4.5441325240263026e-05, "loss": 0.3897, "num_tokens": 305939675.0, "step": 1601 }, { "epoch": 0.5468509984639017, "grad_norm": 0.2512722569659664, "learning_rate": 4.5435002529084473e-05, "loss": 0.3831, "num_tokens": 306134943.0, "step": 1602 }, { "epoch": 0.5471923536439666, "grad_norm": 0.2437371428071746, "learning_rate": 4.542867981790592e-05, "loss": 0.3979, "num_tokens": 306356549.0, "step": 1603 }, { "epoch": 0.5475337088240314, "grad_norm": 0.28037507727309036, "learning_rate": 4.542235710672737e-05, "loss": 0.4197, "num_tokens": 306531282.0, "step": 1604 }, { "epoch": 0.5478750640040962, "grad_norm": 0.25008533896341756, "learning_rate": 4.5416034395548814e-05, "loss": 0.4144, "num_tokens": 306707529.0, "step": 1605 }, { "epoch": 0.5482164191841611, "grad_norm": 0.2502857117777451, "learning_rate": 4.540971168437026e-05, "loss": 0.4045, "num_tokens": 306896977.0, "step": 1606 }, { "epoch": 0.5485577743642259, "grad_norm": 0.2669357028171833, "learning_rate": 4.540338897319171e-05, "loss": 0.3678, "num_tokens": 307051037.0, "step": 1607 }, { "epoch": 0.5488991295442909, "grad_norm": 0.23459843269218675, "learning_rate": 4.5397066262013155e-05, "loss": 0.3721, "num_tokens": 307243389.0, "step": 1608 }, { "epoch": 0.5492404847243557, "grad_norm": 0.2425676861874413, "learning_rate": 4.53907435508346e-05, "loss": 0.3684, "num_tokens": 307442006.0, "step": 1609 }, { "epoch": 0.5495818399044206, "grad_norm": 0.2485484779514974, "learning_rate": 4.538442083965604e-05, "loss": 0.3853, "num_tokens": 307629119.0, "step": 1610 }, { "epoch": 0.5499231950844854, "grad_norm": 0.23243926652572333, "learning_rate": 4.537809812847749e-05, "loss": 0.382, "num_tokens": 307832741.0, "step": 1611 }, { "epoch": 0.5502645502645502, "grad_norm": 0.2810669725656193, "learning_rate": 4.5371775417298936e-05, "loss": 0.4312, "num_tokens": 308005129.0, "step": 1612 }, { "epoch": 0.5506059054446151, "grad_norm": 0.23899342801935183, "learning_rate": 4.536545270612039e-05, "loss": 0.3824, "num_tokens": 308197128.0, "step": 1613 }, { "epoch": 0.55094726062468, "grad_norm": 0.23556788703335982, "learning_rate": 4.535912999494184e-05, "loss": 0.369, "num_tokens": 308385920.0, "step": 1614 }, { "epoch": 0.5512886158047449, "grad_norm": 0.2627121590571999, "learning_rate": 4.5352807283763284e-05, "loss": 0.3589, "num_tokens": 308580544.0, "step": 1615 }, { "epoch": 0.5516299709848097, "grad_norm": 0.23976186636911903, "learning_rate": 4.5346484572584724e-05, "loss": 0.3795, "num_tokens": 308784644.0, "step": 1616 }, { "epoch": 0.5519713261648745, "grad_norm": 0.22427552647904384, "learning_rate": 4.534016186140617e-05, "loss": 0.3648, "num_tokens": 308994794.0, "step": 1617 }, { "epoch": 0.5523126813449394, "grad_norm": 0.24800286528180462, "learning_rate": 4.533383915022762e-05, "loss": 0.399, "num_tokens": 309218713.0, "step": 1618 }, { "epoch": 0.5526540365250042, "grad_norm": 0.2533936645501811, "learning_rate": 4.5327516439049065e-05, "loss": 0.3679, "num_tokens": 309385822.0, "step": 1619 }, { "epoch": 0.5529953917050692, "grad_norm": 0.24744531191727034, "learning_rate": 4.532119372787051e-05, "loss": 0.3507, "num_tokens": 309552372.0, "step": 1620 }, { "epoch": 0.553336746885134, "grad_norm": 0.22869938042635465, "learning_rate": 4.531487101669196e-05, "loss": 0.3651, "num_tokens": 309759070.0, "step": 1621 }, { "epoch": 0.5536781020651989, "grad_norm": 0.24116584355663395, "learning_rate": 4.5308548305513406e-05, "loss": 0.3406, "num_tokens": 309943567.0, "step": 1622 }, { "epoch": 0.5540194572452637, "grad_norm": 0.2467603016383475, "learning_rate": 4.530222559433485e-05, "loss": 0.3948, "num_tokens": 310147990.0, "step": 1623 }, { "epoch": 0.5543608124253285, "grad_norm": 0.2546221390851674, "learning_rate": 4.52959028831563e-05, "loss": 0.3677, "num_tokens": 310326019.0, "step": 1624 }, { "epoch": 0.5547021676053934, "grad_norm": 0.25180488139982016, "learning_rate": 4.528958017197775e-05, "loss": 0.3736, "num_tokens": 310493425.0, "step": 1625 }, { "epoch": 0.5550435227854583, "grad_norm": 0.23027073922774852, "learning_rate": 4.5283257460799194e-05, "loss": 0.3852, "num_tokens": 310679442.0, "step": 1626 }, { "epoch": 0.5553848779655232, "grad_norm": 0.25436952035810556, "learning_rate": 4.527693474962064e-05, "loss": 0.3994, "num_tokens": 310870072.0, "step": 1627 }, { "epoch": 0.555726233145588, "grad_norm": 0.24322655111559519, "learning_rate": 4.527061203844208e-05, "loss": 0.4001, "num_tokens": 311115764.0, "step": 1628 }, { "epoch": 0.5560675883256528, "grad_norm": 0.2385212409790268, "learning_rate": 4.526428932726353e-05, "loss": 0.3659, "num_tokens": 311299992.0, "step": 1629 }, { "epoch": 0.5564089435057177, "grad_norm": 0.25492659657019273, "learning_rate": 4.525796661608498e-05, "loss": 0.3581, "num_tokens": 311452156.0, "step": 1630 }, { "epoch": 0.5567502986857825, "grad_norm": 0.22688919080048106, "learning_rate": 4.525164390490643e-05, "loss": 0.3593, "num_tokens": 311627081.0, "step": 1631 }, { "epoch": 0.5570916538658475, "grad_norm": 0.2523857402679703, "learning_rate": 4.5245321193727876e-05, "loss": 0.4055, "num_tokens": 311855547.0, "step": 1632 }, { "epoch": 0.5574330090459123, "grad_norm": 0.27867111280611917, "learning_rate": 4.523899848254932e-05, "loss": 0.3934, "num_tokens": 312030222.0, "step": 1633 }, { "epoch": 0.5577743642259771, "grad_norm": 0.26461205970949775, "learning_rate": 4.523267577137076e-05, "loss": 0.4027, "num_tokens": 312194193.0, "step": 1634 }, { "epoch": 0.558115719406042, "grad_norm": 0.2648694750312365, "learning_rate": 4.522635306019221e-05, "loss": 0.3811, "num_tokens": 312349101.0, "step": 1635 }, { "epoch": 0.5584570745861068, "grad_norm": 0.2597893196768454, "learning_rate": 4.522003034901366e-05, "loss": 0.3656, "num_tokens": 312525506.0, "step": 1636 }, { "epoch": 0.5587984297661717, "grad_norm": 0.24008453102501512, "learning_rate": 4.5213707637835104e-05, "loss": 0.345, "num_tokens": 312691786.0, "step": 1637 }, { "epoch": 0.5591397849462365, "grad_norm": 0.2502454236243191, "learning_rate": 4.520738492665655e-05, "loss": 0.3971, "num_tokens": 312913187.0, "step": 1638 }, { "epoch": 0.5594811401263015, "grad_norm": 0.25893759550017814, "learning_rate": 4.5201062215478005e-05, "loss": 0.3706, "num_tokens": 313099973.0, "step": 1639 }, { "epoch": 0.5598224953063663, "grad_norm": 0.2261457039695871, "learning_rate": 4.5194739504299445e-05, "loss": 0.3692, "num_tokens": 313314137.0, "step": 1640 }, { "epoch": 0.5601638504864311, "grad_norm": 0.24688724685461289, "learning_rate": 4.518841679312089e-05, "loss": 0.3785, "num_tokens": 313511161.0, "step": 1641 }, { "epoch": 0.560505205666496, "grad_norm": 0.2278908790515456, "learning_rate": 4.518209408194234e-05, "loss": 0.3498, "num_tokens": 313741127.0, "step": 1642 }, { "epoch": 0.5608465608465608, "grad_norm": 0.26737710846931323, "learning_rate": 4.5175771370763786e-05, "loss": 0.3843, "num_tokens": 313913055.0, "step": 1643 }, { "epoch": 0.5611879160266257, "grad_norm": 0.23721710452234923, "learning_rate": 4.516944865958523e-05, "loss": 0.3942, "num_tokens": 314117741.0, "step": 1644 }, { "epoch": 0.5615292712066906, "grad_norm": 0.26638227067863046, "learning_rate": 4.516312594840668e-05, "loss": 0.3771, "num_tokens": 314273954.0, "step": 1645 }, { "epoch": 0.5618706263867554, "grad_norm": 0.23170703552405522, "learning_rate": 4.515680323722812e-05, "loss": 0.3954, "num_tokens": 314488885.0, "step": 1646 }, { "epoch": 0.5622119815668203, "grad_norm": 0.21814531390482714, "learning_rate": 4.5150480526049574e-05, "loss": 0.3408, "num_tokens": 314694790.0, "step": 1647 }, { "epoch": 0.5625533367468851, "grad_norm": 0.24409238754283444, "learning_rate": 4.514415781487102e-05, "loss": 0.3709, "num_tokens": 314875522.0, "step": 1648 }, { "epoch": 0.56289469192695, "grad_norm": 0.2399227581212656, "learning_rate": 4.513783510369247e-05, "loss": 0.3707, "num_tokens": 315085496.0, "step": 1649 }, { "epoch": 0.5632360471070148, "grad_norm": 0.22630330535488716, "learning_rate": 4.5131512392513915e-05, "loss": 0.3605, "num_tokens": 315277408.0, "step": 1650 }, { "epoch": 0.5635774022870798, "grad_norm": 0.24956752464442594, "learning_rate": 4.512518968133536e-05, "loss": 0.4131, "num_tokens": 315472779.0, "step": 1651 }, { "epoch": 0.5639187574671446, "grad_norm": 0.24485843161739995, "learning_rate": 4.51188669701568e-05, "loss": 0.4038, "num_tokens": 315646574.0, "step": 1652 }, { "epoch": 0.5642601126472094, "grad_norm": 0.24490004115621425, "learning_rate": 4.511254425897825e-05, "loss": 0.4109, "num_tokens": 315848618.0, "step": 1653 }, { "epoch": 0.5646014678272743, "grad_norm": 0.24217834999599708, "learning_rate": 4.5106221547799696e-05, "loss": 0.3757, "num_tokens": 316065777.0, "step": 1654 }, { "epoch": 0.5649428230073391, "grad_norm": 0.21748260562758534, "learning_rate": 4.509989883662114e-05, "loss": 0.3753, "num_tokens": 316275394.0, "step": 1655 }, { "epoch": 0.565284178187404, "grad_norm": 0.25066341404621506, "learning_rate": 4.50935761254426e-05, "loss": 0.3703, "num_tokens": 316453288.0, "step": 1656 }, { "epoch": 0.5656255333674689, "grad_norm": 0.24821417571658605, "learning_rate": 4.5087253414264044e-05, "loss": 0.3748, "num_tokens": 316635511.0, "step": 1657 }, { "epoch": 0.5659668885475337, "grad_norm": 0.24869205971404837, "learning_rate": 4.5080930703085484e-05, "loss": 0.3665, "num_tokens": 316780942.0, "step": 1658 }, { "epoch": 0.5663082437275986, "grad_norm": 0.2541450499931133, "learning_rate": 4.507460799190693e-05, "loss": 0.4027, "num_tokens": 316962906.0, "step": 1659 }, { "epoch": 0.5666495989076634, "grad_norm": 0.26528258658789455, "learning_rate": 4.506828528072838e-05, "loss": 0.3439, "num_tokens": 317112451.0, "step": 1660 }, { "epoch": 0.5669909540877283, "grad_norm": 0.24861743412799778, "learning_rate": 4.5061962569549825e-05, "loss": 0.4379, "num_tokens": 317333321.0, "step": 1661 }, { "epoch": 0.5673323092677931, "grad_norm": 0.2640704838079268, "learning_rate": 4.505563985837127e-05, "loss": 0.4071, "num_tokens": 317508984.0, "step": 1662 }, { "epoch": 0.567673664447858, "grad_norm": 0.2374379416043883, "learning_rate": 4.504931714719272e-05, "loss": 0.3346, "num_tokens": 317692543.0, "step": 1663 }, { "epoch": 0.5680150196279229, "grad_norm": 0.2414870569954817, "learning_rate": 4.5042994436014166e-05, "loss": 0.3573, "num_tokens": 317892697.0, "step": 1664 }, { "epoch": 0.5683563748079877, "grad_norm": 0.24449453138553376, "learning_rate": 4.503667172483561e-05, "loss": 0.3644, "num_tokens": 318113668.0, "step": 1665 }, { "epoch": 0.5686977299880526, "grad_norm": 0.24017234722218608, "learning_rate": 4.503034901365706e-05, "loss": 0.3768, "num_tokens": 318340787.0, "step": 1666 }, { "epoch": 0.5690390851681174, "grad_norm": 0.2641304604949197, "learning_rate": 4.502402630247851e-05, "loss": 0.4005, "num_tokens": 318504271.0, "step": 1667 }, { "epoch": 0.5693804403481822, "grad_norm": 0.24541413248887253, "learning_rate": 4.5017703591299954e-05, "loss": 0.4016, "num_tokens": 318736848.0, "step": 1668 }, { "epoch": 0.5697217955282471, "grad_norm": 0.24072517137162933, "learning_rate": 4.50113808801214e-05, "loss": 0.3491, "num_tokens": 318922467.0, "step": 1669 }, { "epoch": 0.570063150708312, "grad_norm": 0.24711421792546084, "learning_rate": 4.500505816894284e-05, "loss": 0.3514, "num_tokens": 319091437.0, "step": 1670 }, { "epoch": 0.5704045058883769, "grad_norm": 0.25504962392675895, "learning_rate": 4.499873545776429e-05, "loss": 0.3704, "num_tokens": 319235163.0, "step": 1671 }, { "epoch": 0.5707458610684417, "grad_norm": 0.26890379372843703, "learning_rate": 4.4992412746585735e-05, "loss": 0.3846, "num_tokens": 319397285.0, "step": 1672 }, { "epoch": 0.5710872162485066, "grad_norm": 0.23307098089248834, "learning_rate": 4.498609003540719e-05, "loss": 0.4023, "num_tokens": 319628780.0, "step": 1673 }, { "epoch": 0.5714285714285714, "grad_norm": 0.22305965819971135, "learning_rate": 4.4979767324228636e-05, "loss": 0.3648, "num_tokens": 319845767.0, "step": 1674 }, { "epoch": 0.5717699266086362, "grad_norm": 0.2798496519948894, "learning_rate": 4.4973444613050076e-05, "loss": 0.3792, "num_tokens": 320042595.0, "step": 1675 }, { "epoch": 0.5721112817887012, "grad_norm": 0.2621970924230319, "learning_rate": 4.496712190187152e-05, "loss": 0.4394, "num_tokens": 320234547.0, "step": 1676 }, { "epoch": 0.572452636968766, "grad_norm": 0.22907737870342107, "learning_rate": 4.496079919069297e-05, "loss": 0.3606, "num_tokens": 320427753.0, "step": 1677 }, { "epoch": 0.5727939921488309, "grad_norm": 0.24149196923653005, "learning_rate": 4.495447647951442e-05, "loss": 0.3773, "num_tokens": 320628541.0, "step": 1678 }, { "epoch": 0.5731353473288957, "grad_norm": 0.222177596175466, "learning_rate": 4.4948153768335864e-05, "loss": 0.3672, "num_tokens": 320827831.0, "step": 1679 }, { "epoch": 0.5734767025089605, "grad_norm": 0.24237103326478865, "learning_rate": 4.494183105715731e-05, "loss": 0.3573, "num_tokens": 320993552.0, "step": 1680 }, { "epoch": 0.5738180576890254, "grad_norm": 0.25126281937238043, "learning_rate": 4.493550834597876e-05, "loss": 0.3875, "num_tokens": 321174555.0, "step": 1681 }, { "epoch": 0.5741594128690903, "grad_norm": 0.23546022718989162, "learning_rate": 4.4929185634800205e-05, "loss": 0.3555, "num_tokens": 321363286.0, "step": 1682 }, { "epoch": 0.5745007680491552, "grad_norm": 0.2313171262536963, "learning_rate": 4.492286292362165e-05, "loss": 0.3644, "num_tokens": 321558303.0, "step": 1683 }, { "epoch": 0.57484212322922, "grad_norm": 0.2610467419953957, "learning_rate": 4.49165402124431e-05, "loss": 0.3618, "num_tokens": 321721785.0, "step": 1684 }, { "epoch": 0.5751834784092849, "grad_norm": 0.25051371189749694, "learning_rate": 4.4910217501264546e-05, "loss": 0.3878, "num_tokens": 321909253.0, "step": 1685 }, { "epoch": 0.5755248335893497, "grad_norm": 0.2732363264482045, "learning_rate": 4.490389479008599e-05, "loss": 0.3786, "num_tokens": 322078448.0, "step": 1686 }, { "epoch": 0.5758661887694145, "grad_norm": 0.2219372964411493, "learning_rate": 4.489757207890743e-05, "loss": 0.3613, "num_tokens": 322281268.0, "step": 1687 }, { "epoch": 0.5762075439494795, "grad_norm": 0.24433838076371486, "learning_rate": 4.489124936772888e-05, "loss": 0.3873, "num_tokens": 322490956.0, "step": 1688 }, { "epoch": 0.5765488991295443, "grad_norm": 0.2474119836198248, "learning_rate": 4.488492665655033e-05, "loss": 0.406, "num_tokens": 322698705.0, "step": 1689 }, { "epoch": 0.5768902543096092, "grad_norm": 0.24425840801361345, "learning_rate": 4.487860394537178e-05, "loss": 0.3671, "num_tokens": 322881204.0, "step": 1690 }, { "epoch": 0.577231609489674, "grad_norm": 0.5997673372077008, "learning_rate": 4.487228123419323e-05, "loss": 0.3752, "num_tokens": 323051180.0, "step": 1691 }, { "epoch": 0.5775729646697388, "grad_norm": 0.24833208120338046, "learning_rate": 4.4865958523014675e-05, "loss": 0.3549, "num_tokens": 323229750.0, "step": 1692 }, { "epoch": 0.5779143198498037, "grad_norm": 0.24570352742818571, "learning_rate": 4.4859635811836115e-05, "loss": 0.3885, "num_tokens": 323425361.0, "step": 1693 }, { "epoch": 0.5782556750298686, "grad_norm": 0.2525199488800816, "learning_rate": 4.485331310065756e-05, "loss": 0.3717, "num_tokens": 323598166.0, "step": 1694 }, { "epoch": 0.5785970302099335, "grad_norm": 0.2458689463914115, "learning_rate": 4.484699038947901e-05, "loss": 0.4009, "num_tokens": 323801584.0, "step": 1695 }, { "epoch": 0.5789383853899983, "grad_norm": 0.25118371521078464, "learning_rate": 4.4840667678300456e-05, "loss": 0.403, "num_tokens": 323997192.0, "step": 1696 }, { "epoch": 0.5792797405700632, "grad_norm": 0.2587342941711323, "learning_rate": 4.48343449671219e-05, "loss": 0.3903, "num_tokens": 324183577.0, "step": 1697 }, { "epoch": 0.579621095750128, "grad_norm": 0.25163329065105877, "learning_rate": 4.482802225594335e-05, "loss": 0.3635, "num_tokens": 324346837.0, "step": 1698 }, { "epoch": 0.5799624509301928, "grad_norm": 0.26259731694256294, "learning_rate": 4.48216995447648e-05, "loss": 0.3678, "num_tokens": 324508124.0, "step": 1699 }, { "epoch": 0.5803038061102577, "grad_norm": 0.23936983992024558, "learning_rate": 4.4815376833586244e-05, "loss": 0.3638, "num_tokens": 324678245.0, "step": 1700 }, { "epoch": 0.5806451612903226, "grad_norm": 0.2531148028518057, "learning_rate": 4.480905412240769e-05, "loss": 0.3545, "num_tokens": 324833086.0, "step": 1701 }, { "epoch": 0.5809865164703875, "grad_norm": 0.22338620276181212, "learning_rate": 4.480273141122914e-05, "loss": 0.3476, "num_tokens": 325035198.0, "step": 1702 }, { "epoch": 0.5813278716504523, "grad_norm": 0.2501139163838905, "learning_rate": 4.4796408700050585e-05, "loss": 0.3517, "num_tokens": 325196730.0, "step": 1703 }, { "epoch": 0.5816692268305171, "grad_norm": 0.25182466250674534, "learning_rate": 4.479008598887203e-05, "loss": 0.3803, "num_tokens": 325359157.0, "step": 1704 }, { "epoch": 0.582010582010582, "grad_norm": 0.2526931342278944, "learning_rate": 4.478376327769347e-05, "loss": 0.3772, "num_tokens": 325515695.0, "step": 1705 }, { "epoch": 0.5823519371906468, "grad_norm": 0.23991299624068455, "learning_rate": 4.477744056651492e-05, "loss": 0.4008, "num_tokens": 325733458.0, "step": 1706 }, { "epoch": 0.5826932923707118, "grad_norm": 0.2394281266003869, "learning_rate": 4.477111785533637e-05, "loss": 0.3776, "num_tokens": 325926427.0, "step": 1707 }, { "epoch": 0.5830346475507766, "grad_norm": 0.2483698840744665, "learning_rate": 4.476479514415782e-05, "loss": 0.4065, "num_tokens": 326094283.0, "step": 1708 }, { "epoch": 0.5833760027308414, "grad_norm": 0.24923234945585362, "learning_rate": 4.475847243297927e-05, "loss": 0.3884, "num_tokens": 326293720.0, "step": 1709 }, { "epoch": 0.5837173579109063, "grad_norm": 0.2471545452338426, "learning_rate": 4.4752149721800714e-05, "loss": 0.3829, "num_tokens": 326476040.0, "step": 1710 }, { "epoch": 0.5840587130909711, "grad_norm": 0.2490937381730523, "learning_rate": 4.4745827010622154e-05, "loss": 0.3875, "num_tokens": 326692704.0, "step": 1711 }, { "epoch": 0.584400068271036, "grad_norm": 0.22879100562819063, "learning_rate": 4.47395042994436e-05, "loss": 0.3554, "num_tokens": 326904522.0, "step": 1712 }, { "epoch": 0.5847414234511009, "grad_norm": 0.23992018638947338, "learning_rate": 4.473318158826505e-05, "loss": 0.4254, "num_tokens": 327144605.0, "step": 1713 }, { "epoch": 0.5850827786311658, "grad_norm": 0.2434584627807664, "learning_rate": 4.4726858877086495e-05, "loss": 0.3936, "num_tokens": 327326975.0, "step": 1714 }, { "epoch": 0.5854241338112306, "grad_norm": 0.23085084205606757, "learning_rate": 4.472053616590794e-05, "loss": 0.371, "num_tokens": 327505554.0, "step": 1715 }, { "epoch": 0.5857654889912954, "grad_norm": 0.2181675272276465, "learning_rate": 4.4714213454729396e-05, "loss": 0.3666, "num_tokens": 327710115.0, "step": 1716 }, { "epoch": 0.5861068441713603, "grad_norm": 0.2412794552282416, "learning_rate": 4.4707890743550836e-05, "loss": 0.3682, "num_tokens": 327897753.0, "step": 1717 }, { "epoch": 0.5864481993514251, "grad_norm": 0.2355341582014182, "learning_rate": 4.470156803237228e-05, "loss": 0.3458, "num_tokens": 328062678.0, "step": 1718 }, { "epoch": 0.5867895545314901, "grad_norm": 0.2349506901100779, "learning_rate": 4.469524532119373e-05, "loss": 0.3337, "num_tokens": 328214155.0, "step": 1719 }, { "epoch": 0.5871309097115549, "grad_norm": 0.2633042269432622, "learning_rate": 4.468892261001518e-05, "loss": 0.4247, "num_tokens": 328402791.0, "step": 1720 }, { "epoch": 0.5874722648916197, "grad_norm": 0.22046728596882215, "learning_rate": 4.4682599898836624e-05, "loss": 0.3837, "num_tokens": 328621071.0, "step": 1721 }, { "epoch": 0.5878136200716846, "grad_norm": 0.28692042760033604, "learning_rate": 4.467627718765807e-05, "loss": 0.3869, "num_tokens": 328799452.0, "step": 1722 }, { "epoch": 0.5881549752517494, "grad_norm": 0.2394625766819583, "learning_rate": 4.466995447647951e-05, "loss": 0.4112, "num_tokens": 329024858.0, "step": 1723 }, { "epoch": 0.5884963304318143, "grad_norm": 0.23271067014446326, "learning_rate": 4.4663631765300965e-05, "loss": 0.3913, "num_tokens": 329241747.0, "step": 1724 }, { "epoch": 0.5888376856118792, "grad_norm": 0.2355699054469138, "learning_rate": 4.465730905412241e-05, "loss": 0.3539, "num_tokens": 329438661.0, "step": 1725 }, { "epoch": 0.589179040791944, "grad_norm": 0.23834449888080028, "learning_rate": 4.465098634294386e-05, "loss": 0.3708, "num_tokens": 329635886.0, "step": 1726 }, { "epoch": 0.5895203959720089, "grad_norm": 0.23315669431246747, "learning_rate": 4.4644663631765306e-05, "loss": 0.378, "num_tokens": 329812475.0, "step": 1727 }, { "epoch": 0.5898617511520737, "grad_norm": 0.24803157573036674, "learning_rate": 4.463834092058675e-05, "loss": 0.3527, "num_tokens": 329984375.0, "step": 1728 }, { "epoch": 0.5902031063321386, "grad_norm": 0.2573351488694869, "learning_rate": 4.463201820940819e-05, "loss": 0.3984, "num_tokens": 330198980.0, "step": 1729 }, { "epoch": 0.5905444615122034, "grad_norm": 0.2203103622506034, "learning_rate": 4.462569549822964e-05, "loss": 0.357, "num_tokens": 330391566.0, "step": 1730 }, { "epoch": 0.5908858166922683, "grad_norm": 0.2116012706154942, "learning_rate": 4.461937278705109e-05, "loss": 0.3561, "num_tokens": 330605061.0, "step": 1731 }, { "epoch": 0.5912271718723332, "grad_norm": 0.23631689710742937, "learning_rate": 4.4613050075872534e-05, "loss": 0.3659, "num_tokens": 330814292.0, "step": 1732 }, { "epoch": 0.591568527052398, "grad_norm": 0.24883201299025823, "learning_rate": 4.460672736469399e-05, "loss": 0.3365, "num_tokens": 330961674.0, "step": 1733 }, { "epoch": 0.5919098822324629, "grad_norm": 0.26427547537061313, "learning_rate": 4.4600404653515435e-05, "loss": 0.352, "num_tokens": 331134765.0, "step": 1734 }, { "epoch": 0.5922512374125277, "grad_norm": 0.2252932765114706, "learning_rate": 4.4594081942336875e-05, "loss": 0.3782, "num_tokens": 331363808.0, "step": 1735 }, { "epoch": 0.5925925925925926, "grad_norm": 0.23960502002005524, "learning_rate": 4.458775923115832e-05, "loss": 0.3508, "num_tokens": 331567102.0, "step": 1736 }, { "epoch": 0.5929339477726574, "grad_norm": 0.23919748306064628, "learning_rate": 4.458143651997977e-05, "loss": 0.3619, "num_tokens": 331759630.0, "step": 1737 }, { "epoch": 0.5932753029527223, "grad_norm": 0.2690776353281173, "learning_rate": 4.4575113808801216e-05, "loss": 0.38, "num_tokens": 331892725.0, "step": 1738 }, { "epoch": 0.5936166581327872, "grad_norm": 0.22926585880472983, "learning_rate": 4.456879109762266e-05, "loss": 0.3818, "num_tokens": 332122844.0, "step": 1739 }, { "epoch": 0.593958013312852, "grad_norm": 0.24530808050646077, "learning_rate": 4.456246838644411e-05, "loss": 0.4113, "num_tokens": 332333325.0, "step": 1740 }, { "epoch": 0.5942993684929169, "grad_norm": 0.2367288514894931, "learning_rate": 4.455614567526556e-05, "loss": 0.3606, "num_tokens": 332522509.0, "step": 1741 }, { "epoch": 0.5946407236729817, "grad_norm": 0.23151744888201595, "learning_rate": 4.4549822964087004e-05, "loss": 0.3847, "num_tokens": 332717955.0, "step": 1742 }, { "epoch": 0.5949820788530465, "grad_norm": 0.24548717905245407, "learning_rate": 4.454350025290845e-05, "loss": 0.3704, "num_tokens": 332874262.0, "step": 1743 }, { "epoch": 0.5953234340331115, "grad_norm": 0.2219484790199542, "learning_rate": 4.45371775417299e-05, "loss": 0.3956, "num_tokens": 333130310.0, "step": 1744 }, { "epoch": 0.5956647892131763, "grad_norm": 0.24850694946075183, "learning_rate": 4.4530854830551345e-05, "loss": 0.4081, "num_tokens": 333304368.0, "step": 1745 }, { "epoch": 0.5960061443932412, "grad_norm": 0.20461919764145403, "learning_rate": 4.452453211937279e-05, "loss": 0.3357, "num_tokens": 333504371.0, "step": 1746 }, { "epoch": 0.596347499573306, "grad_norm": 0.22043242004487656, "learning_rate": 4.451820940819423e-05, "loss": 0.3671, "num_tokens": 333700485.0, "step": 1747 }, { "epoch": 0.5966888547533709, "grad_norm": 0.23074118365306884, "learning_rate": 4.451188669701568e-05, "loss": 0.3757, "num_tokens": 333899410.0, "step": 1748 }, { "epoch": 0.5970302099334357, "grad_norm": 0.2295737099473916, "learning_rate": 4.4505563985837126e-05, "loss": 0.3954, "num_tokens": 334109791.0, "step": 1749 }, { "epoch": 0.5973715651135006, "grad_norm": 0.2303199190837224, "learning_rate": 4.449924127465858e-05, "loss": 0.3678, "num_tokens": 334308012.0, "step": 1750 }, { "epoch": 0.5977129202935655, "grad_norm": 0.2520027063191743, "learning_rate": 4.4492918563480026e-05, "loss": 0.3538, "num_tokens": 334470566.0, "step": 1751 }, { "epoch": 0.5980542754736303, "grad_norm": 0.24052465673866227, "learning_rate": 4.4486595852301473e-05, "loss": 0.3644, "num_tokens": 334641912.0, "step": 1752 }, { "epoch": 0.5983956306536952, "grad_norm": 0.2394772751582438, "learning_rate": 4.4480273141122914e-05, "loss": 0.3972, "num_tokens": 334837218.0, "step": 1753 }, { "epoch": 0.59873698583376, "grad_norm": 0.22661107059591032, "learning_rate": 4.447395042994436e-05, "loss": 0.3577, "num_tokens": 335010068.0, "step": 1754 }, { "epoch": 0.5990783410138248, "grad_norm": 0.23154559871673477, "learning_rate": 4.446762771876581e-05, "loss": 0.3513, "num_tokens": 335193607.0, "step": 1755 }, { "epoch": 0.5994196961938898, "grad_norm": 0.21336182488365785, "learning_rate": 4.4461305007587255e-05, "loss": 0.351, "num_tokens": 335423086.0, "step": 1756 }, { "epoch": 0.5997610513739546, "grad_norm": 0.22775586289569585, "learning_rate": 4.44549822964087e-05, "loss": 0.3538, "num_tokens": 335602737.0, "step": 1757 }, { "epoch": 0.6001024065540195, "grad_norm": 0.20551770364571684, "learning_rate": 4.444865958523015e-05, "loss": 0.3334, "num_tokens": 335808455.0, "step": 1758 }, { "epoch": 0.6004437617340843, "grad_norm": 0.24283731468959935, "learning_rate": 4.4442336874051596e-05, "loss": 0.3566, "num_tokens": 335966296.0, "step": 1759 }, { "epoch": 0.6007851169141492, "grad_norm": 0.25368338234670773, "learning_rate": 4.443601416287304e-05, "loss": 0.3624, "num_tokens": 336155483.0, "step": 1760 }, { "epoch": 0.601126472094214, "grad_norm": 0.2574448043910449, "learning_rate": 4.442969145169449e-05, "loss": 0.377, "num_tokens": 336339048.0, "step": 1761 }, { "epoch": 0.6014678272742788, "grad_norm": 0.23534186222511003, "learning_rate": 4.4423368740515937e-05, "loss": 0.3567, "num_tokens": 336495632.0, "step": 1762 }, { "epoch": 0.6018091824543438, "grad_norm": 0.2527596094118896, "learning_rate": 4.4417046029337384e-05, "loss": 0.3866, "num_tokens": 336676606.0, "step": 1763 }, { "epoch": 0.6021505376344086, "grad_norm": 0.24191435789572643, "learning_rate": 4.441072331815883e-05, "loss": 0.3354, "num_tokens": 336841475.0, "step": 1764 }, { "epoch": 0.6024918928144735, "grad_norm": 0.22914224583819898, "learning_rate": 4.440440060698027e-05, "loss": 0.356, "num_tokens": 337021026.0, "step": 1765 }, { "epoch": 0.6028332479945383, "grad_norm": 0.23132888844011804, "learning_rate": 4.439807789580172e-05, "loss": 0.3807, "num_tokens": 337224337.0, "step": 1766 }, { "epoch": 0.6031746031746031, "grad_norm": 0.27062525605500243, "learning_rate": 4.439175518462317e-05, "loss": 0.4033, "num_tokens": 337414840.0, "step": 1767 }, { "epoch": 0.603515958354668, "grad_norm": 0.24581522143364934, "learning_rate": 4.438543247344462e-05, "loss": 0.3887, "num_tokens": 337606837.0, "step": 1768 }, { "epoch": 0.6038573135347329, "grad_norm": 0.23638899288962717, "learning_rate": 4.4379109762266065e-05, "loss": 0.3711, "num_tokens": 337772748.0, "step": 1769 }, { "epoch": 0.6041986687147978, "grad_norm": 0.2355846978684492, "learning_rate": 4.437278705108751e-05, "loss": 0.3767, "num_tokens": 337940549.0, "step": 1770 }, { "epoch": 0.6045400238948626, "grad_norm": 0.24679530381098505, "learning_rate": 4.436646433990895e-05, "loss": 0.3924, "num_tokens": 338149612.0, "step": 1771 }, { "epoch": 0.6048813790749274, "grad_norm": 0.2544881629037288, "learning_rate": 4.43601416287304e-05, "loss": 0.4187, "num_tokens": 338377454.0, "step": 1772 }, { "epoch": 0.6052227342549923, "grad_norm": 0.22903355761701227, "learning_rate": 4.4353818917551847e-05, "loss": 0.3825, "num_tokens": 338576551.0, "step": 1773 }, { "epoch": 0.6055640894350571, "grad_norm": 0.21796494489796386, "learning_rate": 4.4347496206373294e-05, "loss": 0.3458, "num_tokens": 338775791.0, "step": 1774 }, { "epoch": 0.6059054446151221, "grad_norm": 0.24201086627449372, "learning_rate": 4.434117349519474e-05, "loss": 0.3911, "num_tokens": 339006848.0, "step": 1775 }, { "epoch": 0.6062467997951869, "grad_norm": 0.2613263672204389, "learning_rate": 4.433485078401619e-05, "loss": 0.3726, "num_tokens": 339162831.0, "step": 1776 }, { "epoch": 0.6065881549752518, "grad_norm": 0.24956222523545343, "learning_rate": 4.4328528072837634e-05, "loss": 0.3607, "num_tokens": 339305774.0, "step": 1777 }, { "epoch": 0.6069295101553166, "grad_norm": 0.2307089508354978, "learning_rate": 4.432220536165908e-05, "loss": 0.3821, "num_tokens": 339492204.0, "step": 1778 }, { "epoch": 0.6072708653353814, "grad_norm": 0.2354901360854662, "learning_rate": 4.431588265048053e-05, "loss": 0.3717, "num_tokens": 339704653.0, "step": 1779 }, { "epoch": 0.6076122205154463, "grad_norm": 0.22722629431997451, "learning_rate": 4.4309559939301975e-05, "loss": 0.3804, "num_tokens": 339912723.0, "step": 1780 }, { "epoch": 0.6079535756955112, "grad_norm": 0.262259986298772, "learning_rate": 4.430323722812342e-05, "loss": 0.4002, "num_tokens": 340143930.0, "step": 1781 }, { "epoch": 0.6082949308755761, "grad_norm": 0.2616041984605036, "learning_rate": 4.429691451694487e-05, "loss": 0.3736, "num_tokens": 340303484.0, "step": 1782 }, { "epoch": 0.6086362860556409, "grad_norm": 0.24911684133257558, "learning_rate": 4.429059180576631e-05, "loss": 0.3764, "num_tokens": 340454673.0, "step": 1783 }, { "epoch": 0.6089776412357057, "grad_norm": 0.25898434523810804, "learning_rate": 4.428426909458776e-05, "loss": 0.3936, "num_tokens": 340666321.0, "step": 1784 }, { "epoch": 0.6093189964157706, "grad_norm": 0.2454888251211202, "learning_rate": 4.427794638340921e-05, "loss": 0.4048, "num_tokens": 340846640.0, "step": 1785 }, { "epoch": 0.6096603515958354, "grad_norm": 0.28478184149311786, "learning_rate": 4.427162367223066e-05, "loss": 0.4097, "num_tokens": 341010166.0, "step": 1786 }, { "epoch": 0.6100017067759004, "grad_norm": 0.23243298983845176, "learning_rate": 4.4265300961052104e-05, "loss": 0.3543, "num_tokens": 341204843.0, "step": 1787 }, { "epoch": 0.6103430619559652, "grad_norm": 0.24664412262451607, "learning_rate": 4.425897824987355e-05, "loss": 0.379, "num_tokens": 341401428.0, "step": 1788 }, { "epoch": 0.61068441713603, "grad_norm": 0.2578886767526291, "learning_rate": 4.425265553869499e-05, "loss": 0.3541, "num_tokens": 341562887.0, "step": 1789 }, { "epoch": 0.6110257723160949, "grad_norm": 0.23978796783604375, "learning_rate": 4.424633282751644e-05, "loss": 0.3655, "num_tokens": 341740434.0, "step": 1790 }, { "epoch": 0.6113671274961597, "grad_norm": 0.2549034305446087, "learning_rate": 4.4240010116337885e-05, "loss": 0.35, "num_tokens": 341932390.0, "step": 1791 }, { "epoch": 0.6117084826762246, "grad_norm": 0.2477235399368931, "learning_rate": 4.423368740515933e-05, "loss": 0.3981, "num_tokens": 342125418.0, "step": 1792 }, { "epoch": 0.6120498378562894, "grad_norm": 0.26827501311430924, "learning_rate": 4.4227364693980786e-05, "loss": 0.3544, "num_tokens": 342300706.0, "step": 1793 }, { "epoch": 0.6123911930363544, "grad_norm": 0.3191801719261224, "learning_rate": 4.422104198280223e-05, "loss": 0.3619, "num_tokens": 342515504.0, "step": 1794 }, { "epoch": 0.6127325482164192, "grad_norm": 0.2181685942082269, "learning_rate": 4.4214719271623673e-05, "loss": 0.3788, "num_tokens": 342741724.0, "step": 1795 }, { "epoch": 0.613073903396484, "grad_norm": 0.22584360926665692, "learning_rate": 4.420839656044512e-05, "loss": 0.3614, "num_tokens": 342943665.0, "step": 1796 }, { "epoch": 0.6134152585765489, "grad_norm": 0.25300357220049263, "learning_rate": 4.420207384926657e-05, "loss": 0.3805, "num_tokens": 343165392.0, "step": 1797 }, { "epoch": 0.6137566137566137, "grad_norm": 0.2455827595811808, "learning_rate": 4.4195751138088014e-05, "loss": 0.3737, "num_tokens": 343351104.0, "step": 1798 }, { "epoch": 0.6140979689366786, "grad_norm": 0.21854361939952802, "learning_rate": 4.418942842690946e-05, "loss": 0.3679, "num_tokens": 343569677.0, "step": 1799 }, { "epoch": 0.6144393241167435, "grad_norm": 0.2531358371810618, "learning_rate": 4.418310571573091e-05, "loss": 0.4121, "num_tokens": 343792685.0, "step": 1800 }, { "epoch": 0.6147806792968084, "grad_norm": 0.24877664658093354, "learning_rate": 4.4176783004552355e-05, "loss": 0.3761, "num_tokens": 343989870.0, "step": 1801 }, { "epoch": 0.6151220344768732, "grad_norm": 0.2910607975157193, "learning_rate": 4.41704602933738e-05, "loss": 0.3627, "num_tokens": 344133223.0, "step": 1802 }, { "epoch": 0.615463389656938, "grad_norm": 0.23597879078447584, "learning_rate": 4.416413758219525e-05, "loss": 0.377, "num_tokens": 344324554.0, "step": 1803 }, { "epoch": 0.6158047448370029, "grad_norm": 0.2382036550779828, "learning_rate": 4.4157814871016696e-05, "loss": 0.3865, "num_tokens": 344552426.0, "step": 1804 }, { "epoch": 0.6161461000170677, "grad_norm": 0.21927911409497206, "learning_rate": 4.415149215983814e-05, "loss": 0.3867, "num_tokens": 344808425.0, "step": 1805 }, { "epoch": 0.6164874551971327, "grad_norm": 0.26409060005544177, "learning_rate": 4.4145169448659583e-05, "loss": 0.4166, "num_tokens": 344983679.0, "step": 1806 }, { "epoch": 0.6168288103771975, "grad_norm": 0.2569748745294454, "learning_rate": 4.413884673748103e-05, "loss": 0.4109, "num_tokens": 345195349.0, "step": 1807 }, { "epoch": 0.6171701655572623, "grad_norm": 0.26651617810489947, "learning_rate": 4.413252402630248e-05, "loss": 0.3767, "num_tokens": 345385686.0, "step": 1808 }, { "epoch": 0.6175115207373272, "grad_norm": 0.22613815787156763, "learning_rate": 4.4126201315123924e-05, "loss": 0.3653, "num_tokens": 345573828.0, "step": 1809 }, { "epoch": 0.617852875917392, "grad_norm": 0.24937306019898442, "learning_rate": 4.411987860394538e-05, "loss": 0.3838, "num_tokens": 345757717.0, "step": 1810 }, { "epoch": 0.6181942310974569, "grad_norm": 0.26228432838247767, "learning_rate": 4.4113555892766825e-05, "loss": 0.3967, "num_tokens": 345929047.0, "step": 1811 }, { "epoch": 0.6185355862775218, "grad_norm": 0.24078533488833406, "learning_rate": 4.4107233181588265e-05, "loss": 0.4058, "num_tokens": 346133419.0, "step": 1812 }, { "epoch": 0.6188769414575866, "grad_norm": 0.23495974462555894, "learning_rate": 4.410091047040971e-05, "loss": 0.3872, "num_tokens": 346334086.0, "step": 1813 }, { "epoch": 0.6192182966376515, "grad_norm": 0.2570473835846614, "learning_rate": 4.409458775923116e-05, "loss": 0.3703, "num_tokens": 346517367.0, "step": 1814 }, { "epoch": 0.6195596518177163, "grad_norm": 0.2804797290329269, "learning_rate": 4.4088265048052606e-05, "loss": 0.3431, "num_tokens": 346710908.0, "step": 1815 }, { "epoch": 0.6199010069977812, "grad_norm": 0.22398244079118526, "learning_rate": 4.408194233687405e-05, "loss": 0.3631, "num_tokens": 346891517.0, "step": 1816 }, { "epoch": 0.620242362177846, "grad_norm": 0.24061959534092064, "learning_rate": 4.40756196256955e-05, "loss": 0.407, "num_tokens": 347064150.0, "step": 1817 }, { "epoch": 0.620583717357911, "grad_norm": 0.24739091036293293, "learning_rate": 4.406929691451695e-05, "loss": 0.3871, "num_tokens": 347259493.0, "step": 1818 }, { "epoch": 0.6209250725379758, "grad_norm": 0.22591376876419508, "learning_rate": 4.4062974203338394e-05, "loss": 0.3588, "num_tokens": 347439569.0, "step": 1819 }, { "epoch": 0.6212664277180406, "grad_norm": 0.2519340390170991, "learning_rate": 4.405665149215984e-05, "loss": 0.3734, "num_tokens": 347628788.0, "step": 1820 }, { "epoch": 0.6216077828981055, "grad_norm": 0.25824005000860406, "learning_rate": 4.405032878098129e-05, "loss": 0.4096, "num_tokens": 347824535.0, "step": 1821 }, { "epoch": 0.6219491380781703, "grad_norm": 0.2423539905538408, "learning_rate": 4.4044006069802735e-05, "loss": 0.385, "num_tokens": 348011051.0, "step": 1822 }, { "epoch": 0.6222904932582352, "grad_norm": 0.2404243462521954, "learning_rate": 4.403768335862418e-05, "loss": 0.3759, "num_tokens": 348222782.0, "step": 1823 }, { "epoch": 0.6226318484383001, "grad_norm": 0.25551307800556794, "learning_rate": 4.403136064744562e-05, "loss": 0.3907, "num_tokens": 348411343.0, "step": 1824 }, { "epoch": 0.622973203618365, "grad_norm": 0.30723255912273456, "learning_rate": 4.402503793626707e-05, "loss": 0.3825, "num_tokens": 348552305.0, "step": 1825 }, { "epoch": 0.6233145587984298, "grad_norm": 0.23828254115903355, "learning_rate": 4.4018715225088516e-05, "loss": 0.3614, "num_tokens": 348740228.0, "step": 1826 }, { "epoch": 0.6236559139784946, "grad_norm": 0.25320353995004247, "learning_rate": 4.401239251390997e-05, "loss": 0.3879, "num_tokens": 348923646.0, "step": 1827 }, { "epoch": 0.6239972691585595, "grad_norm": 0.23342848166200356, "learning_rate": 4.400606980273142e-05, "loss": 0.3466, "num_tokens": 349089015.0, "step": 1828 }, { "epoch": 0.6243386243386243, "grad_norm": 0.2373617436140553, "learning_rate": 4.3999747091552864e-05, "loss": 0.3522, "num_tokens": 349281372.0, "step": 1829 }, { "epoch": 0.6246799795186891, "grad_norm": 0.25773824580525084, "learning_rate": 4.3993424380374304e-05, "loss": 0.4125, "num_tokens": 349474884.0, "step": 1830 }, { "epoch": 0.6250213346987541, "grad_norm": 0.24990877039186307, "learning_rate": 4.398710166919575e-05, "loss": 0.3622, "num_tokens": 349620258.0, "step": 1831 }, { "epoch": 0.6253626898788189, "grad_norm": 0.225910346960705, "learning_rate": 4.39807789580172e-05, "loss": 0.3573, "num_tokens": 349822891.0, "step": 1832 }, { "epoch": 0.6257040450588838, "grad_norm": 0.23520379817575782, "learning_rate": 4.3974456246838645e-05, "loss": 0.3964, "num_tokens": 350034076.0, "step": 1833 }, { "epoch": 0.6260454002389486, "grad_norm": 0.21794211130255497, "learning_rate": 4.396813353566009e-05, "loss": 0.3831, "num_tokens": 350262532.0, "step": 1834 }, { "epoch": 0.6263867554190135, "grad_norm": 0.23596533349481416, "learning_rate": 4.396181082448154e-05, "loss": 0.385, "num_tokens": 350448438.0, "step": 1835 }, { "epoch": 0.6267281105990783, "grad_norm": 0.24231888939437088, "learning_rate": 4.3955488113302986e-05, "loss": 0.3858, "num_tokens": 350631164.0, "step": 1836 }, { "epoch": 0.6270694657791432, "grad_norm": 0.23293778262765336, "learning_rate": 4.394916540212443e-05, "loss": 0.3525, "num_tokens": 350813166.0, "step": 1837 }, { "epoch": 0.6274108209592081, "grad_norm": 0.21774243967747792, "learning_rate": 4.394284269094588e-05, "loss": 0.3582, "num_tokens": 350995789.0, "step": 1838 }, { "epoch": 0.6277521761392729, "grad_norm": 0.2281419131583684, "learning_rate": 4.393651997976733e-05, "loss": 0.3833, "num_tokens": 351204014.0, "step": 1839 }, { "epoch": 0.6280935313193378, "grad_norm": 0.21324363084467815, "learning_rate": 4.3930197268588774e-05, "loss": 0.3132, "num_tokens": 351356547.0, "step": 1840 }, { "epoch": 0.6284348864994026, "grad_norm": 0.22758413368824137, "learning_rate": 4.392387455741022e-05, "loss": 0.367, "num_tokens": 351529172.0, "step": 1841 }, { "epoch": 0.6287762416794674, "grad_norm": 0.21418975001163984, "learning_rate": 4.391755184623166e-05, "loss": 0.3621, "num_tokens": 351733465.0, "step": 1842 }, { "epoch": 0.6291175968595324, "grad_norm": 0.23271286658200133, "learning_rate": 4.391122913505311e-05, "loss": 0.349, "num_tokens": 351920302.0, "step": 1843 }, { "epoch": 0.6294589520395972, "grad_norm": 0.22575191580764514, "learning_rate": 4.390490642387456e-05, "loss": 0.4068, "num_tokens": 352151300.0, "step": 1844 }, { "epoch": 0.6298003072196621, "grad_norm": 0.25858438148657753, "learning_rate": 4.389858371269601e-05, "loss": 0.3692, "num_tokens": 352313782.0, "step": 1845 }, { "epoch": 0.6301416623997269, "grad_norm": 0.23145912016417833, "learning_rate": 4.3892261001517456e-05, "loss": 0.3708, "num_tokens": 352497893.0, "step": 1846 }, { "epoch": 0.6304830175797917, "grad_norm": 0.2533261132011824, "learning_rate": 4.38859382903389e-05, "loss": 0.3663, "num_tokens": 352694237.0, "step": 1847 }, { "epoch": 0.6308243727598566, "grad_norm": 0.25548905912562386, "learning_rate": 4.387961557916034e-05, "loss": 0.4022, "num_tokens": 352898628.0, "step": 1848 }, { "epoch": 0.6311657279399215, "grad_norm": 0.27482138188437494, "learning_rate": 4.387329286798179e-05, "loss": 0.3973, "num_tokens": 353076005.0, "step": 1849 }, { "epoch": 0.6315070831199864, "grad_norm": 0.21887462267802693, "learning_rate": 4.386697015680324e-05, "loss": 0.3415, "num_tokens": 353272469.0, "step": 1850 }, { "epoch": 0.6318484383000512, "grad_norm": 0.25036902589529947, "learning_rate": 4.3860647445624684e-05, "loss": 0.3944, "num_tokens": 353449019.0, "step": 1851 }, { "epoch": 0.6321897934801161, "grad_norm": 0.26679511369621, "learning_rate": 4.385432473444613e-05, "loss": 0.3761, "num_tokens": 353640324.0, "step": 1852 }, { "epoch": 0.6325311486601809, "grad_norm": 0.22048099503593702, "learning_rate": 4.384800202326758e-05, "loss": 0.3539, "num_tokens": 353843407.0, "step": 1853 }, { "epoch": 0.6328725038402457, "grad_norm": 0.2239974655144779, "learning_rate": 4.3841679312089025e-05, "loss": 0.37, "num_tokens": 354038278.0, "step": 1854 }, { "epoch": 0.6332138590203107, "grad_norm": 0.2373681706398939, "learning_rate": 4.383535660091047e-05, "loss": 0.3512, "num_tokens": 354227915.0, "step": 1855 }, { "epoch": 0.6335552142003755, "grad_norm": 0.2451025338709667, "learning_rate": 4.382903388973192e-05, "loss": 0.3731, "num_tokens": 354412613.0, "step": 1856 }, { "epoch": 0.6338965693804404, "grad_norm": 0.2360134639921329, "learning_rate": 4.3822711178553366e-05, "loss": 0.3728, "num_tokens": 354585841.0, "step": 1857 }, { "epoch": 0.6342379245605052, "grad_norm": 0.23897783647668763, "learning_rate": 4.381638846737481e-05, "loss": 0.3651, "num_tokens": 354762651.0, "step": 1858 }, { "epoch": 0.63457927974057, "grad_norm": 0.2296279831095206, "learning_rate": 4.381006575619626e-05, "loss": 0.3908, "num_tokens": 354990327.0, "step": 1859 }, { "epoch": 0.6349206349206349, "grad_norm": 0.22831011887607838, "learning_rate": 4.38037430450177e-05, "loss": 0.3462, "num_tokens": 355171242.0, "step": 1860 }, { "epoch": 0.6352619901006997, "grad_norm": 0.22990994950363625, "learning_rate": 4.3797420333839154e-05, "loss": 0.3697, "num_tokens": 355374957.0, "step": 1861 }, { "epoch": 0.6356033452807647, "grad_norm": 0.21128433385683407, "learning_rate": 4.37910976226606e-05, "loss": 0.3486, "num_tokens": 355596121.0, "step": 1862 }, { "epoch": 0.6359447004608295, "grad_norm": 0.23784641385832933, "learning_rate": 4.378477491148205e-05, "loss": 0.4026, "num_tokens": 355790076.0, "step": 1863 }, { "epoch": 0.6362860556408944, "grad_norm": 0.22718958035088407, "learning_rate": 4.3778452200303495e-05, "loss": 0.3486, "num_tokens": 355973877.0, "step": 1864 }, { "epoch": 0.6366274108209592, "grad_norm": 0.2235729055116926, "learning_rate": 4.377212948912494e-05, "loss": 0.3813, "num_tokens": 356183553.0, "step": 1865 }, { "epoch": 0.636968766001024, "grad_norm": 0.25495065118159915, "learning_rate": 4.376580677794638e-05, "loss": 0.4042, "num_tokens": 356347877.0, "step": 1866 }, { "epoch": 0.6373101211810889, "grad_norm": 0.2776689440161567, "learning_rate": 4.375948406676783e-05, "loss": 0.3842, "num_tokens": 356478883.0, "step": 1867 }, { "epoch": 0.6376514763611538, "grad_norm": 0.2330004980342953, "learning_rate": 4.3753161355589276e-05, "loss": 0.3599, "num_tokens": 356652283.0, "step": 1868 }, { "epoch": 0.6379928315412187, "grad_norm": 0.24586834743275923, "learning_rate": 4.374683864441072e-05, "loss": 0.4168, "num_tokens": 356864212.0, "step": 1869 }, { "epoch": 0.6383341867212835, "grad_norm": 0.2535942763966028, "learning_rate": 4.374051593323218e-05, "loss": 0.4336, "num_tokens": 357081622.0, "step": 1870 }, { "epoch": 0.6386755419013483, "grad_norm": 0.24174961635283324, "learning_rate": 4.3734193222053624e-05, "loss": 0.3695, "num_tokens": 357261902.0, "step": 1871 }, { "epoch": 0.6390168970814132, "grad_norm": 0.22898609032085113, "learning_rate": 4.3727870510875064e-05, "loss": 0.3759, "num_tokens": 357452605.0, "step": 1872 }, { "epoch": 0.639358252261478, "grad_norm": 0.23526896719754536, "learning_rate": 4.372154779969651e-05, "loss": 0.3732, "num_tokens": 357635032.0, "step": 1873 }, { "epoch": 0.639699607441543, "grad_norm": 0.24168362844199845, "learning_rate": 4.371522508851796e-05, "loss": 0.3711, "num_tokens": 357803345.0, "step": 1874 }, { "epoch": 0.6400409626216078, "grad_norm": 0.23428253942848754, "learning_rate": 4.3708902377339405e-05, "loss": 0.4075, "num_tokens": 358024082.0, "step": 1875 }, { "epoch": 0.6403823178016727, "grad_norm": 0.25296114293811367, "learning_rate": 4.370257966616085e-05, "loss": 0.4016, "num_tokens": 358204511.0, "step": 1876 }, { "epoch": 0.6407236729817375, "grad_norm": 0.24572225238530485, "learning_rate": 4.36962569549823e-05, "loss": 0.3663, "num_tokens": 358370999.0, "step": 1877 }, { "epoch": 0.6410650281618023, "grad_norm": 0.2423905110616859, "learning_rate": 4.3689934243803746e-05, "loss": 0.3901, "num_tokens": 358557743.0, "step": 1878 }, { "epoch": 0.6414063833418672, "grad_norm": 0.23593118281839254, "learning_rate": 4.368361153262519e-05, "loss": 0.3614, "num_tokens": 358737457.0, "step": 1879 }, { "epoch": 0.6417477385219321, "grad_norm": 0.2746952272446643, "learning_rate": 4.367728882144664e-05, "loss": 0.4069, "num_tokens": 358968256.0, "step": 1880 }, { "epoch": 0.642089093701997, "grad_norm": 0.2106173354382864, "learning_rate": 4.367096611026809e-05, "loss": 0.3565, "num_tokens": 359164776.0, "step": 1881 }, { "epoch": 0.6424304488820618, "grad_norm": 0.21240888061866817, "learning_rate": 4.3664643399089534e-05, "loss": 0.3516, "num_tokens": 359375898.0, "step": 1882 }, { "epoch": 0.6427718040621266, "grad_norm": 0.23603000203737798, "learning_rate": 4.365832068791098e-05, "loss": 0.3573, "num_tokens": 359559422.0, "step": 1883 }, { "epoch": 0.6431131592421915, "grad_norm": 0.23273922966467028, "learning_rate": 4.365199797673242e-05, "loss": 0.3581, "num_tokens": 359731126.0, "step": 1884 }, { "epoch": 0.6434545144222563, "grad_norm": 0.21274280845414115, "learning_rate": 4.364567526555387e-05, "loss": 0.3579, "num_tokens": 359918370.0, "step": 1885 }, { "epoch": 0.6437958696023213, "grad_norm": 0.23091383763257003, "learning_rate": 4.3639352554375315e-05, "loss": 0.3931, "num_tokens": 360130120.0, "step": 1886 }, { "epoch": 0.6441372247823861, "grad_norm": 0.23886112828738573, "learning_rate": 4.363302984319677e-05, "loss": 0.3626, "num_tokens": 360325237.0, "step": 1887 }, { "epoch": 0.644478579962451, "grad_norm": 0.24878806480758184, "learning_rate": 4.3626707132018216e-05, "loss": 0.3778, "num_tokens": 360502185.0, "step": 1888 }, { "epoch": 0.6448199351425158, "grad_norm": 0.250761870188453, "learning_rate": 4.362038442083966e-05, "loss": 0.3833, "num_tokens": 360672422.0, "step": 1889 }, { "epoch": 0.6451612903225806, "grad_norm": 0.24664058324631039, "learning_rate": 4.36140617096611e-05, "loss": 0.3781, "num_tokens": 360854075.0, "step": 1890 }, { "epoch": 0.6455026455026455, "grad_norm": 0.2297583867411252, "learning_rate": 4.360773899848255e-05, "loss": 0.3712, "num_tokens": 361037055.0, "step": 1891 }, { "epoch": 0.6458440006827103, "grad_norm": 0.23872138893006578, "learning_rate": 4.3601416287304e-05, "loss": 0.3503, "num_tokens": 361189729.0, "step": 1892 }, { "epoch": 0.6461853558627753, "grad_norm": 0.2219860799409807, "learning_rate": 4.3595093576125444e-05, "loss": 0.3782, "num_tokens": 361395453.0, "step": 1893 }, { "epoch": 0.6465267110428401, "grad_norm": 0.24445854140788115, "learning_rate": 4.358877086494689e-05, "loss": 0.3827, "num_tokens": 361578057.0, "step": 1894 }, { "epoch": 0.6468680662229049, "grad_norm": 0.22084571559114363, "learning_rate": 4.358244815376834e-05, "loss": 0.3479, "num_tokens": 361761915.0, "step": 1895 }, { "epoch": 0.6472094214029698, "grad_norm": 0.2488187991729424, "learning_rate": 4.3576125442589785e-05, "loss": 0.4206, "num_tokens": 361963766.0, "step": 1896 }, { "epoch": 0.6475507765830346, "grad_norm": 0.22562123415371635, "learning_rate": 4.356980273141123e-05, "loss": 0.3553, "num_tokens": 362122935.0, "step": 1897 }, { "epoch": 0.6478921317630995, "grad_norm": 0.2394858818944602, "learning_rate": 4.356348002023268e-05, "loss": 0.3724, "num_tokens": 362304476.0, "step": 1898 }, { "epoch": 0.6482334869431644, "grad_norm": 0.22601702625420408, "learning_rate": 4.3557157309054126e-05, "loss": 0.3531, "num_tokens": 362498926.0, "step": 1899 }, { "epoch": 0.6485748421232292, "grad_norm": 0.2316436798178977, "learning_rate": 4.355083459787557e-05, "loss": 0.3882, "num_tokens": 362711609.0, "step": 1900 }, { "epoch": 0.6489161973032941, "grad_norm": 0.2629882278072927, "learning_rate": 4.354451188669702e-05, "loss": 0.3938, "num_tokens": 362863182.0, "step": 1901 }, { "epoch": 0.6492575524833589, "grad_norm": 0.2358549961655554, "learning_rate": 4.353818917551846e-05, "loss": 0.3617, "num_tokens": 363037761.0, "step": 1902 }, { "epoch": 0.6495989076634238, "grad_norm": 0.21987807430197748, "learning_rate": 4.353186646433991e-05, "loss": 0.3652, "num_tokens": 363247247.0, "step": 1903 }, { "epoch": 0.6499402628434886, "grad_norm": 0.24815719013236118, "learning_rate": 4.352554375316136e-05, "loss": 0.3923, "num_tokens": 363443894.0, "step": 1904 }, { "epoch": 0.6502816180235536, "grad_norm": 0.23918870086892713, "learning_rate": 4.351922104198281e-05, "loss": 0.3707, "num_tokens": 363630267.0, "step": 1905 }, { "epoch": 0.6506229732036184, "grad_norm": 0.22517153259066627, "learning_rate": 4.3512898330804255e-05, "loss": 0.3461, "num_tokens": 363818040.0, "step": 1906 }, { "epoch": 0.6509643283836832, "grad_norm": 0.25989712350586636, "learning_rate": 4.35065756196257e-05, "loss": 0.4065, "num_tokens": 364020524.0, "step": 1907 }, { "epoch": 0.6513056835637481, "grad_norm": 0.22139003749500907, "learning_rate": 4.350025290844714e-05, "loss": 0.368, "num_tokens": 364222638.0, "step": 1908 }, { "epoch": 0.6516470387438129, "grad_norm": 0.22435963744821946, "learning_rate": 4.349393019726859e-05, "loss": 0.3312, "num_tokens": 364393838.0, "step": 1909 }, { "epoch": 0.6519883939238778, "grad_norm": 0.2619952783505076, "learning_rate": 4.3487607486090036e-05, "loss": 0.3732, "num_tokens": 364542283.0, "step": 1910 }, { "epoch": 0.6523297491039427, "grad_norm": 0.233909034062778, "learning_rate": 4.348128477491148e-05, "loss": 0.3675, "num_tokens": 364737407.0, "step": 1911 }, { "epoch": 0.6526711042840075, "grad_norm": 0.24046050911670142, "learning_rate": 4.347496206373293e-05, "loss": 0.398, "num_tokens": 364929498.0, "step": 1912 }, { "epoch": 0.6530124594640724, "grad_norm": 0.23472475617633487, "learning_rate": 4.346863935255438e-05, "loss": 0.3591, "num_tokens": 365101561.0, "step": 1913 }, { "epoch": 0.6533538146441372, "grad_norm": 0.26005404945680727, "learning_rate": 4.3462316641375824e-05, "loss": 0.3655, "num_tokens": 365244505.0, "step": 1914 }, { "epoch": 0.6536951698242021, "grad_norm": 0.23936290024510695, "learning_rate": 4.345599393019727e-05, "loss": 0.3896, "num_tokens": 365455118.0, "step": 1915 }, { "epoch": 0.6540365250042669, "grad_norm": 0.3307058185063551, "learning_rate": 4.344967121901872e-05, "loss": 0.3487, "num_tokens": 365626398.0, "step": 1916 }, { "epoch": 0.6543778801843319, "grad_norm": 0.22842427290189268, "learning_rate": 4.3443348507840165e-05, "loss": 0.3588, "num_tokens": 365807854.0, "step": 1917 }, { "epoch": 0.6547192353643967, "grad_norm": 0.2357118505955382, "learning_rate": 4.343702579666161e-05, "loss": 0.3424, "num_tokens": 365992989.0, "step": 1918 }, { "epoch": 0.6550605905444615, "grad_norm": 0.2532241985257071, "learning_rate": 4.343070308548306e-05, "loss": 0.3682, "num_tokens": 366194474.0, "step": 1919 }, { "epoch": 0.6554019457245264, "grad_norm": 0.24560063270816118, "learning_rate": 4.34243803743045e-05, "loss": 0.3648, "num_tokens": 366389645.0, "step": 1920 }, { "epoch": 0.6557433009045912, "grad_norm": 0.23484282259767675, "learning_rate": 4.341805766312595e-05, "loss": 0.3767, "num_tokens": 366603924.0, "step": 1921 }, { "epoch": 0.656084656084656, "grad_norm": 0.23615218547363911, "learning_rate": 4.34117349519474e-05, "loss": 0.3882, "num_tokens": 366805907.0, "step": 1922 }, { "epoch": 0.6564260112647209, "grad_norm": 0.22016699519409308, "learning_rate": 4.3405412240768847e-05, "loss": 0.3623, "num_tokens": 367023177.0, "step": 1923 }, { "epoch": 0.6567673664447858, "grad_norm": 0.23626824335711014, "learning_rate": 4.3399089529590294e-05, "loss": 0.3738, "num_tokens": 367202870.0, "step": 1924 }, { "epoch": 0.6571087216248507, "grad_norm": 0.22934245063328745, "learning_rate": 4.339276681841174e-05, "loss": 0.3987, "num_tokens": 367420832.0, "step": 1925 }, { "epoch": 0.6574500768049155, "grad_norm": 0.23787175990556744, "learning_rate": 4.338644410723318e-05, "loss": 0.3718, "num_tokens": 367611165.0, "step": 1926 }, { "epoch": 0.6577914319849804, "grad_norm": 0.24096121497386597, "learning_rate": 4.338012139605463e-05, "loss": 0.3586, "num_tokens": 367776045.0, "step": 1927 }, { "epoch": 0.6581327871650452, "grad_norm": 0.24487503180542056, "learning_rate": 4.3373798684876075e-05, "loss": 0.4136, "num_tokens": 367971431.0, "step": 1928 }, { "epoch": 0.65847414234511, "grad_norm": 0.37355412126830234, "learning_rate": 4.336747597369752e-05, "loss": 0.4108, "num_tokens": 368163600.0, "step": 1929 }, { "epoch": 0.658815497525175, "grad_norm": 0.23673947208146118, "learning_rate": 4.336115326251897e-05, "loss": 0.3848, "num_tokens": 368356587.0, "step": 1930 }, { "epoch": 0.6591568527052398, "grad_norm": 0.226291575848087, "learning_rate": 4.335483055134042e-05, "loss": 0.3501, "num_tokens": 368534695.0, "step": 1931 }, { "epoch": 0.6594982078853047, "grad_norm": 0.24186321179209455, "learning_rate": 4.334850784016186e-05, "loss": 0.3612, "num_tokens": 368703895.0, "step": 1932 }, { "epoch": 0.6598395630653695, "grad_norm": 0.2511841456285775, "learning_rate": 4.334218512898331e-05, "loss": 0.4168, "num_tokens": 368881108.0, "step": 1933 }, { "epoch": 0.6601809182454343, "grad_norm": 0.24899730115306473, "learning_rate": 4.333586241780476e-05, "loss": 0.3923, "num_tokens": 369069948.0, "step": 1934 }, { "epoch": 0.6605222734254992, "grad_norm": 0.22779418013138772, "learning_rate": 4.3329539706626204e-05, "loss": 0.3567, "num_tokens": 369264423.0, "step": 1935 }, { "epoch": 0.6608636286055641, "grad_norm": 0.23462860557079285, "learning_rate": 4.332321699544765e-05, "loss": 0.3798, "num_tokens": 369462335.0, "step": 1936 }, { "epoch": 0.661204983785629, "grad_norm": 0.22355272525714975, "learning_rate": 4.331689428426909e-05, "loss": 0.3461, "num_tokens": 369669420.0, "step": 1937 }, { "epoch": 0.6615463389656938, "grad_norm": 0.2550097316101052, "learning_rate": 4.3310571573090545e-05, "loss": 0.3993, "num_tokens": 369845901.0, "step": 1938 }, { "epoch": 0.6618876941457587, "grad_norm": 0.22970166701985667, "learning_rate": 4.330424886191199e-05, "loss": 0.3875, "num_tokens": 370046267.0, "step": 1939 }, { "epoch": 0.6622290493258235, "grad_norm": 0.24268955191920705, "learning_rate": 4.329792615073344e-05, "loss": 0.3862, "num_tokens": 370239830.0, "step": 1940 }, { "epoch": 0.6625704045058883, "grad_norm": 0.21626717455525846, "learning_rate": 4.3291603439554885e-05, "loss": 0.3759, "num_tokens": 370434480.0, "step": 1941 }, { "epoch": 0.6629117596859533, "grad_norm": 0.39363673488756473, "learning_rate": 4.328528072837633e-05, "loss": 0.357, "num_tokens": 370588463.0, "step": 1942 }, { "epoch": 0.6632531148660181, "grad_norm": 0.3042031125180229, "learning_rate": 4.327895801719777e-05, "loss": 0.3595, "num_tokens": 370749210.0, "step": 1943 }, { "epoch": 0.663594470046083, "grad_norm": 0.2338431530825712, "learning_rate": 4.327263530601922e-05, "loss": 0.357, "num_tokens": 370917548.0, "step": 1944 }, { "epoch": 0.6639358252261478, "grad_norm": 0.23657235911420624, "learning_rate": 4.326631259484067e-05, "loss": 0.3465, "num_tokens": 371107469.0, "step": 1945 }, { "epoch": 0.6642771804062126, "grad_norm": 0.23148773833780223, "learning_rate": 4.3259989883662114e-05, "loss": 0.3693, "num_tokens": 371277570.0, "step": 1946 }, { "epoch": 0.6646185355862775, "grad_norm": 0.24304474785287017, "learning_rate": 4.325366717248357e-05, "loss": 0.389, "num_tokens": 371495652.0, "step": 1947 }, { "epoch": 0.6649598907663424, "grad_norm": 0.23321371286785905, "learning_rate": 4.3247344461305014e-05, "loss": 0.3509, "num_tokens": 371668910.0, "step": 1948 }, { "epoch": 0.6653012459464073, "grad_norm": 0.2350009858690826, "learning_rate": 4.3241021750126455e-05, "loss": 0.4055, "num_tokens": 371889817.0, "step": 1949 }, { "epoch": 0.6656426011264721, "grad_norm": 0.21513488712443526, "learning_rate": 4.32346990389479e-05, "loss": 0.3211, "num_tokens": 372082119.0, "step": 1950 }, { "epoch": 0.665983956306537, "grad_norm": 0.24474597973373488, "learning_rate": 4.322837632776935e-05, "loss": 0.3295, "num_tokens": 372252459.0, "step": 1951 }, { "epoch": 0.6663253114866018, "grad_norm": 0.24802213311399354, "learning_rate": 4.3222053616590796e-05, "loss": 0.4073, "num_tokens": 372454242.0, "step": 1952 }, { "epoch": 0.6666666666666666, "grad_norm": 0.22678944707958124, "learning_rate": 4.321573090541224e-05, "loss": 0.3975, "num_tokens": 372671487.0, "step": 1953 }, { "epoch": 0.6670080218467315, "grad_norm": 0.25088709424040057, "learning_rate": 4.320940819423369e-05, "loss": 0.4048, "num_tokens": 372885064.0, "step": 1954 }, { "epoch": 0.6673493770267964, "grad_norm": 0.22081151639069874, "learning_rate": 4.3203085483055136e-05, "loss": 0.3731, "num_tokens": 373104814.0, "step": 1955 }, { "epoch": 0.6676907322068613, "grad_norm": 0.2203401281525503, "learning_rate": 4.3196762771876583e-05, "loss": 0.3577, "num_tokens": 373291559.0, "step": 1956 }, { "epoch": 0.6680320873869261, "grad_norm": 0.2292879153999466, "learning_rate": 4.319044006069803e-05, "loss": 0.3575, "num_tokens": 373483194.0, "step": 1957 }, { "epoch": 0.6683734425669909, "grad_norm": 0.23681939862784404, "learning_rate": 4.318411734951948e-05, "loss": 0.3762, "num_tokens": 373683889.0, "step": 1958 }, { "epoch": 0.6687147977470558, "grad_norm": 0.27155150803647826, "learning_rate": 4.3177794638340924e-05, "loss": 0.3944, "num_tokens": 373848240.0, "step": 1959 }, { "epoch": 0.6690561529271206, "grad_norm": 0.22363675954917267, "learning_rate": 4.317147192716237e-05, "loss": 0.3824, "num_tokens": 374080177.0, "step": 1960 }, { "epoch": 0.6693975081071856, "grad_norm": 0.25028650943660685, "learning_rate": 4.316514921598381e-05, "loss": 0.3888, "num_tokens": 374260209.0, "step": 1961 }, { "epoch": 0.6697388632872504, "grad_norm": 0.2273782037308235, "learning_rate": 4.315882650480526e-05, "loss": 0.3496, "num_tokens": 374447035.0, "step": 1962 }, { "epoch": 0.6700802184673152, "grad_norm": 0.24384933054356045, "learning_rate": 4.3152503793626706e-05, "loss": 0.3538, "num_tokens": 374638916.0, "step": 1963 }, { "epoch": 0.6704215736473801, "grad_norm": 0.230844198160812, "learning_rate": 4.314618108244816e-05, "loss": 0.399, "num_tokens": 374865388.0, "step": 1964 }, { "epoch": 0.6707629288274449, "grad_norm": 0.2696029056218616, "learning_rate": 4.3139858371269606e-05, "loss": 0.3785, "num_tokens": 375025882.0, "step": 1965 }, { "epoch": 0.6711042840075098, "grad_norm": 0.21771702528837666, "learning_rate": 4.313353566009105e-05, "loss": 0.3403, "num_tokens": 375196867.0, "step": 1966 }, { "epoch": 0.6714456391875747, "grad_norm": 0.2545271586877868, "learning_rate": 4.3127212948912494e-05, "loss": 0.3561, "num_tokens": 375381348.0, "step": 1967 }, { "epoch": 0.6717869943676396, "grad_norm": 0.23863055570013442, "learning_rate": 4.312089023773394e-05, "loss": 0.3817, "num_tokens": 375563262.0, "step": 1968 }, { "epoch": 0.6721283495477044, "grad_norm": 0.24520924720299905, "learning_rate": 4.311456752655539e-05, "loss": 0.3602, "num_tokens": 375755456.0, "step": 1969 }, { "epoch": 0.6724697047277692, "grad_norm": 0.24992519905313224, "learning_rate": 4.3108244815376834e-05, "loss": 0.3503, "num_tokens": 375945273.0, "step": 1970 }, { "epoch": 0.6728110599078341, "grad_norm": 0.23465592051328088, "learning_rate": 4.310192210419828e-05, "loss": 0.3545, "num_tokens": 376158616.0, "step": 1971 }, { "epoch": 0.6731524150878989, "grad_norm": 0.24745249529808575, "learning_rate": 4.309559939301973e-05, "loss": 0.3649, "num_tokens": 376351652.0, "step": 1972 }, { "epoch": 0.6734937702679639, "grad_norm": 0.29869299521836523, "learning_rate": 4.3089276681841175e-05, "loss": 0.3666, "num_tokens": 376526876.0, "step": 1973 }, { "epoch": 0.6738351254480287, "grad_norm": 0.23275251875917385, "learning_rate": 4.308295397066262e-05, "loss": 0.3473, "num_tokens": 376733829.0, "step": 1974 }, { "epoch": 0.6741764806280935, "grad_norm": 0.23848803170402513, "learning_rate": 4.307663125948407e-05, "loss": 0.3716, "num_tokens": 376928384.0, "step": 1975 }, { "epoch": 0.6745178358081584, "grad_norm": 0.2251245523277536, "learning_rate": 4.3070308548305516e-05, "loss": 0.3311, "num_tokens": 377105080.0, "step": 1976 }, { "epoch": 0.6748591909882232, "grad_norm": 0.2631300398811243, "learning_rate": 4.306398583712696e-05, "loss": 0.3997, "num_tokens": 377273242.0, "step": 1977 }, { "epoch": 0.6752005461682881, "grad_norm": 0.22941519084114542, "learning_rate": 4.305766312594841e-05, "loss": 0.3983, "num_tokens": 377508472.0, "step": 1978 }, { "epoch": 0.675541901348353, "grad_norm": 0.224280812592458, "learning_rate": 4.305134041476985e-05, "loss": 0.3596, "num_tokens": 377680574.0, "step": 1979 }, { "epoch": 0.6758832565284179, "grad_norm": 0.2668878442443925, "learning_rate": 4.30450177035913e-05, "loss": 0.3936, "num_tokens": 377838356.0, "step": 1980 }, { "epoch": 0.6762246117084827, "grad_norm": 0.2361891019681567, "learning_rate": 4.303869499241275e-05, "loss": 0.3706, "num_tokens": 378014728.0, "step": 1981 }, { "epoch": 0.6765659668885475, "grad_norm": 0.2264242220797253, "learning_rate": 4.30323722812342e-05, "loss": 0.3467, "num_tokens": 378238119.0, "step": 1982 }, { "epoch": 0.6769073220686124, "grad_norm": 0.23326326615573895, "learning_rate": 4.3026049570055645e-05, "loss": 0.3858, "num_tokens": 378446898.0, "step": 1983 }, { "epoch": 0.6772486772486772, "grad_norm": 0.2396495168458108, "learning_rate": 4.301972685887709e-05, "loss": 0.3665, "num_tokens": 378613653.0, "step": 1984 }, { "epoch": 0.6775900324287422, "grad_norm": 0.21997909805064875, "learning_rate": 4.301340414769853e-05, "loss": 0.3428, "num_tokens": 378773187.0, "step": 1985 }, { "epoch": 0.677931387608807, "grad_norm": 0.24013850512664872, "learning_rate": 4.300708143651998e-05, "loss": 0.3515, "num_tokens": 378959920.0, "step": 1986 }, { "epoch": 0.6782727427888718, "grad_norm": 0.22422195923281055, "learning_rate": 4.3000758725341426e-05, "loss": 0.3472, "num_tokens": 379169757.0, "step": 1987 }, { "epoch": 0.6786140979689367, "grad_norm": 0.2075603698221459, "learning_rate": 4.299443601416287e-05, "loss": 0.3339, "num_tokens": 379354927.0, "step": 1988 }, { "epoch": 0.6789554531490015, "grad_norm": 0.35415231846912804, "learning_rate": 4.298811330298432e-05, "loss": 0.3453, "num_tokens": 379561376.0, "step": 1989 }, { "epoch": 0.6792968083290664, "grad_norm": 0.23899377727895693, "learning_rate": 4.298179059180577e-05, "loss": 0.4054, "num_tokens": 379755336.0, "step": 1990 }, { "epoch": 0.6796381635091312, "grad_norm": 0.2442107016451042, "learning_rate": 4.2975467880627214e-05, "loss": 0.3855, "num_tokens": 379942807.0, "step": 1991 }, { "epoch": 0.6799795186891961, "grad_norm": 0.23665588866109777, "learning_rate": 4.296914516944866e-05, "loss": 0.3727, "num_tokens": 380106017.0, "step": 1992 }, { "epoch": 0.680320873869261, "grad_norm": 0.2681541404942557, "learning_rate": 4.296282245827011e-05, "loss": 0.4097, "num_tokens": 380308566.0, "step": 1993 }, { "epoch": 0.6806622290493258, "grad_norm": 0.26638100799023356, "learning_rate": 4.2956499747091555e-05, "loss": 0.4087, "num_tokens": 380465870.0, "step": 1994 }, { "epoch": 0.6810035842293907, "grad_norm": 0.25978618819540855, "learning_rate": 4.2950177035913e-05, "loss": 0.3292, "num_tokens": 380657264.0, "step": 1995 }, { "epoch": 0.6813449394094555, "grad_norm": 0.2577188073896876, "learning_rate": 4.294385432473445e-05, "loss": 0.4248, "num_tokens": 380872125.0, "step": 1996 }, { "epoch": 0.6816862945895203, "grad_norm": 0.254383713944771, "learning_rate": 4.293753161355589e-05, "loss": 0.3687, "num_tokens": 381029884.0, "step": 1997 }, { "epoch": 0.6820276497695853, "grad_norm": 0.22352962326470002, "learning_rate": 4.293120890237734e-05, "loss": 0.3349, "num_tokens": 381204807.0, "step": 1998 }, { "epoch": 0.6823690049496501, "grad_norm": 0.22489396087743332, "learning_rate": 4.292488619119879e-05, "loss": 0.3769, "num_tokens": 381412162.0, "step": 1999 }, { "epoch": 0.682710360129715, "grad_norm": 0.23543711533871128, "learning_rate": 4.291856348002024e-05, "loss": 0.3468, "num_tokens": 381610099.0, "step": 2000 }, { "epoch": 0.6830517153097798, "grad_norm": 0.2596021082769746, "learning_rate": 4.2912240768841684e-05, "loss": 0.4223, "num_tokens": 381872616.0, "step": 2001 }, { "epoch": 0.6833930704898447, "grad_norm": 0.2690793369509014, "learning_rate": 4.290591805766313e-05, "loss": 0.3822, "num_tokens": 382051490.0, "step": 2002 }, { "epoch": 0.6837344256699095, "grad_norm": 0.22983780163168144, "learning_rate": 4.289959534648457e-05, "loss": 0.3789, "num_tokens": 382245250.0, "step": 2003 }, { "epoch": 0.6840757808499744, "grad_norm": 0.2252865481834095, "learning_rate": 4.289327263530602e-05, "loss": 0.357, "num_tokens": 382420064.0, "step": 2004 }, { "epoch": 0.6844171360300393, "grad_norm": 0.30924234851088955, "learning_rate": 4.2886949924127465e-05, "loss": 0.4002, "num_tokens": 382623713.0, "step": 2005 }, { "epoch": 0.6847584912101041, "grad_norm": 0.23655820734271796, "learning_rate": 4.288062721294891e-05, "loss": 0.3917, "num_tokens": 382805560.0, "step": 2006 }, { "epoch": 0.685099846390169, "grad_norm": 0.23137220667482156, "learning_rate": 4.287430450177036e-05, "loss": 0.3609, "num_tokens": 382997921.0, "step": 2007 }, { "epoch": 0.6854412015702338, "grad_norm": 0.24564696705543598, "learning_rate": 4.286798179059181e-05, "loss": 0.3716, "num_tokens": 383163240.0, "step": 2008 }, { "epoch": 0.6857825567502986, "grad_norm": 0.3066170476182562, "learning_rate": 4.286165907941325e-05, "loss": 0.3767, "num_tokens": 383330529.0, "step": 2009 }, { "epoch": 0.6861239119303636, "grad_norm": 0.2424386153840973, "learning_rate": 4.28553363682347e-05, "loss": 0.3827, "num_tokens": 383518425.0, "step": 2010 }, { "epoch": 0.6864652671104284, "grad_norm": 0.22119727151607813, "learning_rate": 4.284901365705615e-05, "loss": 0.3458, "num_tokens": 383702298.0, "step": 2011 }, { "epoch": 0.6868066222904933, "grad_norm": 0.23137875173769365, "learning_rate": 4.2842690945877594e-05, "loss": 0.3529, "num_tokens": 383872645.0, "step": 2012 }, { "epoch": 0.6871479774705581, "grad_norm": 0.25196682408612775, "learning_rate": 4.283636823469904e-05, "loss": 0.4052, "num_tokens": 384056268.0, "step": 2013 }, { "epoch": 0.687489332650623, "grad_norm": 0.2190109561121634, "learning_rate": 4.283004552352049e-05, "loss": 0.3471, "num_tokens": 384253835.0, "step": 2014 }, { "epoch": 0.6878306878306878, "grad_norm": 0.2228806308325679, "learning_rate": 4.2823722812341935e-05, "loss": 0.344, "num_tokens": 384440443.0, "step": 2015 }, { "epoch": 0.6881720430107527, "grad_norm": 0.21017771660440687, "learning_rate": 4.281740010116338e-05, "loss": 0.3658, "num_tokens": 384671701.0, "step": 2016 }, { "epoch": 0.6885133981908176, "grad_norm": 0.2572682095757346, "learning_rate": 4.281107738998483e-05, "loss": 0.3644, "num_tokens": 384825253.0, "step": 2017 }, { "epoch": 0.6888547533708824, "grad_norm": 0.24180334379485566, "learning_rate": 4.2804754678806276e-05, "loss": 0.3643, "num_tokens": 384985743.0, "step": 2018 }, { "epoch": 0.6891961085509473, "grad_norm": 0.2276299272155312, "learning_rate": 4.279843196762772e-05, "loss": 0.3665, "num_tokens": 385183313.0, "step": 2019 }, { "epoch": 0.6895374637310121, "grad_norm": 0.2562997858220187, "learning_rate": 4.279210925644917e-05, "loss": 0.3722, "num_tokens": 385338800.0, "step": 2020 }, { "epoch": 0.6898788189110769, "grad_norm": 0.24394283995167293, "learning_rate": 4.278578654527061e-05, "loss": 0.351, "num_tokens": 385495173.0, "step": 2021 }, { "epoch": 0.6902201740911418, "grad_norm": 0.22709206956118272, "learning_rate": 4.277946383409206e-05, "loss": 0.3814, "num_tokens": 385711469.0, "step": 2022 }, { "epoch": 0.6905615292712067, "grad_norm": 0.21856076403401975, "learning_rate": 4.2773141122913504e-05, "loss": 0.3828, "num_tokens": 385920263.0, "step": 2023 }, { "epoch": 0.6909028844512716, "grad_norm": 0.23012423051770226, "learning_rate": 4.276681841173496e-05, "loss": 0.3826, "num_tokens": 386117081.0, "step": 2024 }, { "epoch": 0.6912442396313364, "grad_norm": 0.2521365970000775, "learning_rate": 4.2760495700556405e-05, "loss": 0.3837, "num_tokens": 386306689.0, "step": 2025 }, { "epoch": 0.6915855948114012, "grad_norm": 0.24350360012284622, "learning_rate": 4.275417298937785e-05, "loss": 0.3642, "num_tokens": 386492836.0, "step": 2026 }, { "epoch": 0.6919269499914661, "grad_norm": 0.23017262959188584, "learning_rate": 4.274785027819929e-05, "loss": 0.3754, "num_tokens": 386692313.0, "step": 2027 }, { "epoch": 0.6922683051715309, "grad_norm": 0.2443349658824139, "learning_rate": 4.274152756702074e-05, "loss": 0.3895, "num_tokens": 386904951.0, "step": 2028 }, { "epoch": 0.6926096603515959, "grad_norm": 0.2645216194841818, "learning_rate": 4.2735204855842186e-05, "loss": 0.3711, "num_tokens": 387066175.0, "step": 2029 }, { "epoch": 0.6929510155316607, "grad_norm": 0.23852996401525384, "learning_rate": 4.272888214466363e-05, "loss": 0.3594, "num_tokens": 387215404.0, "step": 2030 }, { "epoch": 0.6932923707117256, "grad_norm": 0.24224104319843087, "learning_rate": 4.272255943348508e-05, "loss": 0.3617, "num_tokens": 387399159.0, "step": 2031 }, { "epoch": 0.6936337258917904, "grad_norm": 0.25263148417214903, "learning_rate": 4.271623672230653e-05, "loss": 0.3633, "num_tokens": 387556470.0, "step": 2032 }, { "epoch": 0.6939750810718552, "grad_norm": 0.3476375800111183, "learning_rate": 4.2709914011127974e-05, "loss": 0.3787, "num_tokens": 387780236.0, "step": 2033 }, { "epoch": 0.6943164362519201, "grad_norm": 0.22847111982477628, "learning_rate": 4.270359129994942e-05, "loss": 0.3695, "num_tokens": 387965115.0, "step": 2034 }, { "epoch": 0.694657791431985, "grad_norm": 0.24494946694270947, "learning_rate": 4.269726858877087e-05, "loss": 0.3904, "num_tokens": 388154704.0, "step": 2035 }, { "epoch": 0.6949991466120499, "grad_norm": 0.2505216369041753, "learning_rate": 4.2690945877592315e-05, "loss": 0.3668, "num_tokens": 388354830.0, "step": 2036 }, { "epoch": 0.6953405017921147, "grad_norm": 0.2227386336997381, "learning_rate": 4.268462316641376e-05, "loss": 0.3351, "num_tokens": 388541307.0, "step": 2037 }, { "epoch": 0.6956818569721795, "grad_norm": 0.2486146909117541, "learning_rate": 4.267830045523521e-05, "loss": 0.3698, "num_tokens": 388699501.0, "step": 2038 }, { "epoch": 0.6960232121522444, "grad_norm": 0.2384953966700619, "learning_rate": 4.267197774405665e-05, "loss": 0.3757, "num_tokens": 388888173.0, "step": 2039 }, { "epoch": 0.6963645673323092, "grad_norm": 0.21851814200637304, "learning_rate": 4.2665655032878096e-05, "loss": 0.3692, "num_tokens": 389089091.0, "step": 2040 }, { "epoch": 0.6967059225123742, "grad_norm": 0.22499441463508568, "learning_rate": 4.265933232169955e-05, "loss": 0.3471, "num_tokens": 389273382.0, "step": 2041 }, { "epoch": 0.697047277692439, "grad_norm": 0.24334758619951463, "learning_rate": 4.2653009610521e-05, "loss": 0.3667, "num_tokens": 389464170.0, "step": 2042 }, { "epoch": 0.6973886328725039, "grad_norm": 0.28295998902218095, "learning_rate": 4.2646686899342444e-05, "loss": 0.3754, "num_tokens": 389623834.0, "step": 2043 }, { "epoch": 0.6977299880525687, "grad_norm": 0.22101042124060857, "learning_rate": 4.264036418816389e-05, "loss": 0.3636, "num_tokens": 389842079.0, "step": 2044 }, { "epoch": 0.6980713432326335, "grad_norm": 0.6186109392883192, "learning_rate": 4.263404147698533e-05, "loss": 0.4259, "num_tokens": 390050395.0, "step": 2045 }, { "epoch": 0.6984126984126984, "grad_norm": 0.23763102856172558, "learning_rate": 4.262771876580678e-05, "loss": 0.4001, "num_tokens": 390249886.0, "step": 2046 }, { "epoch": 0.6987540535927633, "grad_norm": 0.21352156369291134, "learning_rate": 4.2621396054628225e-05, "loss": 0.3686, "num_tokens": 390480566.0, "step": 2047 }, { "epoch": 0.6990954087728282, "grad_norm": 0.2503916700338682, "learning_rate": 4.261507334344967e-05, "loss": 0.4051, "num_tokens": 390673941.0, "step": 2048 }, { "epoch": 0.699436763952893, "grad_norm": 0.23692759021948331, "learning_rate": 4.260875063227112e-05, "loss": 0.3862, "num_tokens": 390866030.0, "step": 2049 }, { "epoch": 0.6997781191329578, "grad_norm": 0.25132282016174473, "learning_rate": 4.2602427921092566e-05, "loss": 0.3911, "num_tokens": 391071497.0, "step": 2050 }, { "epoch": 0.7001194743130227, "grad_norm": 0.21946097333633927, "learning_rate": 4.259610520991401e-05, "loss": 0.3813, "num_tokens": 391267458.0, "step": 2051 }, { "epoch": 0.7004608294930875, "grad_norm": 0.21698455699949584, "learning_rate": 4.258978249873546e-05, "loss": 0.3874, "num_tokens": 391476761.0, "step": 2052 }, { "epoch": 0.7008021846731524, "grad_norm": 0.23180678747886616, "learning_rate": 4.258345978755691e-05, "loss": 0.3513, "num_tokens": 391647030.0, "step": 2053 }, { "epoch": 0.7011435398532173, "grad_norm": 0.21046387036269049, "learning_rate": 4.2577137076378354e-05, "loss": 0.3512, "num_tokens": 391879233.0, "step": 2054 }, { "epoch": 0.7014848950332822, "grad_norm": 0.2504295306459441, "learning_rate": 4.25708143651998e-05, "loss": 0.3949, "num_tokens": 392063623.0, "step": 2055 }, { "epoch": 0.701826250213347, "grad_norm": 0.23957521266322312, "learning_rate": 4.256449165402125e-05, "loss": 0.3795, "num_tokens": 392220151.0, "step": 2056 }, { "epoch": 0.7021676053934118, "grad_norm": 0.21357953790375897, "learning_rate": 4.255816894284269e-05, "loss": 0.3653, "num_tokens": 392417825.0, "step": 2057 }, { "epoch": 0.7025089605734767, "grad_norm": 0.3447992879824768, "learning_rate": 4.255184623166414e-05, "loss": 0.3899, "num_tokens": 392568877.0, "step": 2058 }, { "epoch": 0.7028503157535415, "grad_norm": 0.23559006671288887, "learning_rate": 4.254552352048559e-05, "loss": 0.3744, "num_tokens": 392761018.0, "step": 2059 }, { "epoch": 0.7031916709336065, "grad_norm": 0.23248963571262707, "learning_rate": 4.2539200809307036e-05, "loss": 0.3453, "num_tokens": 392927324.0, "step": 2060 }, { "epoch": 0.7035330261136713, "grad_norm": 0.2425574197928247, "learning_rate": 4.253287809812848e-05, "loss": 0.3793, "num_tokens": 393110068.0, "step": 2061 }, { "epoch": 0.7038743812937361, "grad_norm": 0.25672657825469175, "learning_rate": 4.252655538694993e-05, "loss": 0.3869, "num_tokens": 393275366.0, "step": 2062 }, { "epoch": 0.704215736473801, "grad_norm": 0.24451140038906757, "learning_rate": 4.252023267577137e-05, "loss": 0.407, "num_tokens": 393455218.0, "step": 2063 }, { "epoch": 0.7045570916538658, "grad_norm": 0.3119243628463542, "learning_rate": 4.251390996459282e-05, "loss": 0.3889, "num_tokens": 393636953.0, "step": 2064 }, { "epoch": 0.7048984468339307, "grad_norm": 0.23783278863224877, "learning_rate": 4.2507587253414264e-05, "loss": 0.3583, "num_tokens": 393802102.0, "step": 2065 }, { "epoch": 0.7052398020139956, "grad_norm": 0.2273296955166275, "learning_rate": 4.250126454223571e-05, "loss": 0.3869, "num_tokens": 394004934.0, "step": 2066 }, { "epoch": 0.7055811571940604, "grad_norm": 0.2328850639931361, "learning_rate": 4.249494183105716e-05, "loss": 0.3705, "num_tokens": 394184025.0, "step": 2067 }, { "epoch": 0.7059225123741253, "grad_norm": 0.22539520091258863, "learning_rate": 4.2488619119878605e-05, "loss": 0.3651, "num_tokens": 394381113.0, "step": 2068 }, { "epoch": 0.7062638675541901, "grad_norm": 0.2216283282991112, "learning_rate": 4.248229640870005e-05, "loss": 0.3606, "num_tokens": 394565499.0, "step": 2069 }, { "epoch": 0.706605222734255, "grad_norm": 0.23247295879885932, "learning_rate": 4.24759736975215e-05, "loss": 0.3799, "num_tokens": 394758497.0, "step": 2070 }, { "epoch": 0.7069465779143198, "grad_norm": 0.2555255413513552, "learning_rate": 4.2469650986342946e-05, "loss": 0.3915, "num_tokens": 394979614.0, "step": 2071 }, { "epoch": 0.7072879330943848, "grad_norm": 0.2552004690587055, "learning_rate": 4.246332827516439e-05, "loss": 0.3832, "num_tokens": 395181365.0, "step": 2072 }, { "epoch": 0.7076292882744496, "grad_norm": 0.2270967873690468, "learning_rate": 4.245700556398584e-05, "loss": 0.3563, "num_tokens": 395345869.0, "step": 2073 }, { "epoch": 0.7079706434545144, "grad_norm": 0.2396561000242682, "learning_rate": 4.245068285280728e-05, "loss": 0.3619, "num_tokens": 395534033.0, "step": 2074 }, { "epoch": 0.7083119986345793, "grad_norm": 0.25369982986226136, "learning_rate": 4.2444360141628734e-05, "loss": 0.3774, "num_tokens": 395715713.0, "step": 2075 }, { "epoch": 0.7086533538146441, "grad_norm": 0.2471332275849194, "learning_rate": 4.243803743045018e-05, "loss": 0.3969, "num_tokens": 395903717.0, "step": 2076 }, { "epoch": 0.708994708994709, "grad_norm": 0.2610541750328059, "learning_rate": 4.243171471927163e-05, "loss": 0.4056, "num_tokens": 396134572.0, "step": 2077 }, { "epoch": 0.7093360641747739, "grad_norm": 0.28815294741636943, "learning_rate": 4.2425392008093075e-05, "loss": 0.4151, "num_tokens": 396284052.0, "step": 2078 }, { "epoch": 0.7096774193548387, "grad_norm": 0.2582596205820117, "learning_rate": 4.241906929691452e-05, "loss": 0.425, "num_tokens": 396456296.0, "step": 2079 }, { "epoch": 0.7100187745349036, "grad_norm": 0.24827251178236368, "learning_rate": 4.241274658573596e-05, "loss": 0.3752, "num_tokens": 396651706.0, "step": 2080 }, { "epoch": 0.7103601297149684, "grad_norm": 0.2440638285661071, "learning_rate": 4.240642387455741e-05, "loss": 0.3655, "num_tokens": 396823236.0, "step": 2081 }, { "epoch": 0.7107014848950333, "grad_norm": 0.20524331736241253, "learning_rate": 4.2400101163378856e-05, "loss": 0.3613, "num_tokens": 397067576.0, "step": 2082 }, { "epoch": 0.7110428400750981, "grad_norm": 0.22836555934652825, "learning_rate": 4.23937784522003e-05, "loss": 0.3578, "num_tokens": 397274661.0, "step": 2083 }, { "epoch": 0.7113841952551629, "grad_norm": 0.23807536088262546, "learning_rate": 4.238745574102175e-05, "loss": 0.3833, "num_tokens": 397449119.0, "step": 2084 }, { "epoch": 0.7117255504352279, "grad_norm": 0.2698205417338035, "learning_rate": 4.2381133029843204e-05, "loss": 0.374, "num_tokens": 397638047.0, "step": 2085 }, { "epoch": 0.7120669056152927, "grad_norm": 0.21819707117049023, "learning_rate": 4.2374810318664644e-05, "loss": 0.372, "num_tokens": 397844170.0, "step": 2086 }, { "epoch": 0.7124082607953576, "grad_norm": 0.22301011154696235, "learning_rate": 4.236848760748609e-05, "loss": 0.3513, "num_tokens": 398018994.0, "step": 2087 }, { "epoch": 0.7127496159754224, "grad_norm": 0.23408071066457808, "learning_rate": 4.236216489630754e-05, "loss": 0.3422, "num_tokens": 398204248.0, "step": 2088 }, { "epoch": 0.7130909711554873, "grad_norm": 0.24353107217140407, "learning_rate": 4.2355842185128985e-05, "loss": 0.3959, "num_tokens": 398402462.0, "step": 2089 }, { "epoch": 0.7134323263355521, "grad_norm": 0.24484656360919074, "learning_rate": 4.234951947395043e-05, "loss": 0.3825, "num_tokens": 398582233.0, "step": 2090 }, { "epoch": 0.713773681515617, "grad_norm": 0.22880722885004778, "learning_rate": 4.234319676277188e-05, "loss": 0.3565, "num_tokens": 398752439.0, "step": 2091 }, { "epoch": 0.7141150366956819, "grad_norm": 0.2323508967139854, "learning_rate": 4.2336874051593326e-05, "loss": 0.3712, "num_tokens": 398925896.0, "step": 2092 }, { "epoch": 0.7144563918757467, "grad_norm": 0.22625290714303758, "learning_rate": 4.233055134041477e-05, "loss": 0.3612, "num_tokens": 399106073.0, "step": 2093 }, { "epoch": 0.7147977470558116, "grad_norm": 0.22828834240049556, "learning_rate": 4.232422862923622e-05, "loss": 0.3581, "num_tokens": 399297605.0, "step": 2094 }, { "epoch": 0.7151391022358764, "grad_norm": 0.23876590478340515, "learning_rate": 4.231790591805767e-05, "loss": 0.3897, "num_tokens": 399497381.0, "step": 2095 }, { "epoch": 0.7154804574159412, "grad_norm": 0.22634489434820354, "learning_rate": 4.2311583206879114e-05, "loss": 0.3867, "num_tokens": 399685178.0, "step": 2096 }, { "epoch": 0.7158218125960062, "grad_norm": 0.2615103005199435, "learning_rate": 4.230526049570056e-05, "loss": 0.4001, "num_tokens": 399856049.0, "step": 2097 }, { "epoch": 0.716163167776071, "grad_norm": 0.2916894464783133, "learning_rate": 4.2298937784522e-05, "loss": 0.4365, "num_tokens": 400062417.0, "step": 2098 }, { "epoch": 0.7165045229561359, "grad_norm": 0.254355073340298, "learning_rate": 4.229261507334345e-05, "loss": 0.3793, "num_tokens": 400243656.0, "step": 2099 }, { "epoch": 0.7168458781362007, "grad_norm": 0.23764681767048848, "learning_rate": 4.2286292362164895e-05, "loss": 0.3448, "num_tokens": 400412266.0, "step": 2100 }, { "epoch": 0.7171872333162655, "grad_norm": 0.2180134018967989, "learning_rate": 4.227996965098635e-05, "loss": 0.3746, "num_tokens": 400596732.0, "step": 2101 }, { "epoch": 0.7175285884963304, "grad_norm": 0.25587998736504713, "learning_rate": 4.2273646939807796e-05, "loss": 0.3768, "num_tokens": 400758121.0, "step": 2102 }, { "epoch": 0.7178699436763953, "grad_norm": 0.2362037234492445, "learning_rate": 4.226732422862924e-05, "loss": 0.3771, "num_tokens": 400968010.0, "step": 2103 }, { "epoch": 0.7182112988564602, "grad_norm": 0.2361547305671652, "learning_rate": 4.226100151745068e-05, "loss": 0.3404, "num_tokens": 401148037.0, "step": 2104 }, { "epoch": 0.718552654036525, "grad_norm": 0.22177310408282316, "learning_rate": 4.225467880627213e-05, "loss": 0.3452, "num_tokens": 401331159.0, "step": 2105 }, { "epoch": 0.7188940092165899, "grad_norm": 0.23708776875065712, "learning_rate": 4.224835609509358e-05, "loss": 0.3801, "num_tokens": 401523976.0, "step": 2106 }, { "epoch": 0.7192353643966547, "grad_norm": 0.24890320053479956, "learning_rate": 4.2242033383915024e-05, "loss": 0.3569, "num_tokens": 401666732.0, "step": 2107 }, { "epoch": 0.7195767195767195, "grad_norm": 0.21206157408190032, "learning_rate": 4.223571067273647e-05, "loss": 0.3587, "num_tokens": 401890983.0, "step": 2108 }, { "epoch": 0.7199180747567845, "grad_norm": 0.21704705492831292, "learning_rate": 4.222938796155792e-05, "loss": 0.3472, "num_tokens": 402088700.0, "step": 2109 }, { "epoch": 0.7202594299368493, "grad_norm": 0.23837350466612783, "learning_rate": 4.2223065250379365e-05, "loss": 0.3755, "num_tokens": 402278964.0, "step": 2110 }, { "epoch": 0.7206007851169142, "grad_norm": 0.23005938855211422, "learning_rate": 4.221674253920081e-05, "loss": 0.4183, "num_tokens": 402465326.0, "step": 2111 }, { "epoch": 0.720942140296979, "grad_norm": 0.22010296707472132, "learning_rate": 4.221041982802226e-05, "loss": 0.3774, "num_tokens": 402645422.0, "step": 2112 }, { "epoch": 0.7212834954770438, "grad_norm": 0.23596744000277284, "learning_rate": 4.2204097116843706e-05, "loss": 0.3499, "num_tokens": 402814137.0, "step": 2113 }, { "epoch": 0.7216248506571087, "grad_norm": 0.2245294128395849, "learning_rate": 4.219777440566515e-05, "loss": 0.3747, "num_tokens": 403008685.0, "step": 2114 }, { "epoch": 0.7219662058371735, "grad_norm": 0.2643955784317731, "learning_rate": 4.21914516944866e-05, "loss": 0.373, "num_tokens": 403179361.0, "step": 2115 }, { "epoch": 0.7223075610172385, "grad_norm": 0.2187258768661599, "learning_rate": 4.218512898330804e-05, "loss": 0.3668, "num_tokens": 403407617.0, "step": 2116 }, { "epoch": 0.7226489161973033, "grad_norm": 0.2601048568500074, "learning_rate": 4.217880627212949e-05, "loss": 0.4123, "num_tokens": 403594776.0, "step": 2117 }, { "epoch": 0.7229902713773682, "grad_norm": 0.21844830147742586, "learning_rate": 4.217248356095094e-05, "loss": 0.346, "num_tokens": 403767444.0, "step": 2118 }, { "epoch": 0.723331626557433, "grad_norm": 0.21772765709700775, "learning_rate": 4.216616084977239e-05, "loss": 0.3631, "num_tokens": 403971431.0, "step": 2119 }, { "epoch": 0.7236729817374978, "grad_norm": 0.24242771927849383, "learning_rate": 4.2159838138593834e-05, "loss": 0.394, "num_tokens": 404166716.0, "step": 2120 }, { "epoch": 0.7240143369175627, "grad_norm": 0.20975637343063813, "learning_rate": 4.215351542741528e-05, "loss": 0.3858, "num_tokens": 404396130.0, "step": 2121 }, { "epoch": 0.7243556920976276, "grad_norm": 0.2440663701250355, "learning_rate": 4.214719271623672e-05, "loss": 0.3767, "num_tokens": 404560984.0, "step": 2122 }, { "epoch": 0.7246970472776925, "grad_norm": 0.20864198281095542, "learning_rate": 4.214087000505817e-05, "loss": 0.3385, "num_tokens": 404760723.0, "step": 2123 }, { "epoch": 0.7250384024577573, "grad_norm": 0.23165484810497364, "learning_rate": 4.2134547293879616e-05, "loss": 0.3646, "num_tokens": 404938254.0, "step": 2124 }, { "epoch": 0.7253797576378221, "grad_norm": 0.2451326572508676, "learning_rate": 4.212822458270106e-05, "loss": 0.3962, "num_tokens": 405102753.0, "step": 2125 }, { "epoch": 0.725721112817887, "grad_norm": 0.2276652441977378, "learning_rate": 4.212190187152251e-05, "loss": 0.3801, "num_tokens": 405310725.0, "step": 2126 }, { "epoch": 0.7260624679979518, "grad_norm": 0.2307017823760436, "learning_rate": 4.2115579160343957e-05, "loss": 0.3561, "num_tokens": 405476007.0, "step": 2127 }, { "epoch": 0.7264038231780168, "grad_norm": 0.2270531415858552, "learning_rate": 4.2109256449165404e-05, "loss": 0.3524, "num_tokens": 405641015.0, "step": 2128 }, { "epoch": 0.7267451783580816, "grad_norm": 0.2148338606871308, "learning_rate": 4.210293373798685e-05, "loss": 0.4046, "num_tokens": 405885518.0, "step": 2129 }, { "epoch": 0.7270865335381465, "grad_norm": 0.229518887093417, "learning_rate": 4.20966110268083e-05, "loss": 0.3966, "num_tokens": 406089738.0, "step": 2130 }, { "epoch": 0.7274278887182113, "grad_norm": 0.23799774381210673, "learning_rate": 4.2090288315629745e-05, "loss": 0.4052, "num_tokens": 406281870.0, "step": 2131 }, { "epoch": 0.7277692438982761, "grad_norm": 0.22186413334485092, "learning_rate": 4.208396560445119e-05, "loss": 0.3543, "num_tokens": 406454183.0, "step": 2132 }, { "epoch": 0.728110599078341, "grad_norm": 0.24486548412352213, "learning_rate": 4.207764289327264e-05, "loss": 0.3675, "num_tokens": 406663567.0, "step": 2133 }, { "epoch": 0.7284519542584059, "grad_norm": 0.21817943349961275, "learning_rate": 4.207132018209408e-05, "loss": 0.3809, "num_tokens": 406915559.0, "step": 2134 }, { "epoch": 0.7287933094384708, "grad_norm": 0.2438342165478739, "learning_rate": 4.206499747091553e-05, "loss": 0.3723, "num_tokens": 407093030.0, "step": 2135 }, { "epoch": 0.7291346646185356, "grad_norm": 0.23444786811387225, "learning_rate": 4.205867475973698e-05, "loss": 0.3702, "num_tokens": 407268788.0, "step": 2136 }, { "epoch": 0.7294760197986004, "grad_norm": 0.21915099044665245, "learning_rate": 4.2052352048558426e-05, "loss": 0.3596, "num_tokens": 407496504.0, "step": 2137 }, { "epoch": 0.7298173749786653, "grad_norm": 0.225562657586173, "learning_rate": 4.2046029337379873e-05, "loss": 0.3629, "num_tokens": 407676636.0, "step": 2138 }, { "epoch": 0.7301587301587301, "grad_norm": 0.22738511411024598, "learning_rate": 4.203970662620132e-05, "loss": 0.408, "num_tokens": 407887459.0, "step": 2139 }, { "epoch": 0.7305000853387951, "grad_norm": 0.22343545658199898, "learning_rate": 4.203338391502276e-05, "loss": 0.3522, "num_tokens": 408060278.0, "step": 2140 }, { "epoch": 0.7308414405188599, "grad_norm": 0.2252636304053176, "learning_rate": 4.202706120384421e-05, "loss": 0.3754, "num_tokens": 408272962.0, "step": 2141 }, { "epoch": 0.7311827956989247, "grad_norm": 0.23195817752730172, "learning_rate": 4.2020738492665655e-05, "loss": 0.3722, "num_tokens": 408445409.0, "step": 2142 }, { "epoch": 0.7315241508789896, "grad_norm": 0.25043806098773147, "learning_rate": 4.20144157814871e-05, "loss": 0.3849, "num_tokens": 408609721.0, "step": 2143 }, { "epoch": 0.7318655060590544, "grad_norm": 0.24614509558368677, "learning_rate": 4.200809307030855e-05, "loss": 0.3937, "num_tokens": 408785040.0, "step": 2144 }, { "epoch": 0.7322068612391193, "grad_norm": 0.25644917274175105, "learning_rate": 4.200177035913e-05, "loss": 0.3752, "num_tokens": 408956774.0, "step": 2145 }, { "epoch": 0.7325482164191841, "grad_norm": 0.23806382003981877, "learning_rate": 4.199544764795144e-05, "loss": 0.3957, "num_tokens": 409159687.0, "step": 2146 }, { "epoch": 0.7328895715992491, "grad_norm": 0.21064589008926587, "learning_rate": 4.198912493677289e-05, "loss": 0.3417, "num_tokens": 409327328.0, "step": 2147 }, { "epoch": 0.7332309267793139, "grad_norm": 0.2691303077338568, "learning_rate": 4.1982802225594336e-05, "loss": 0.4018, "num_tokens": 409562503.0, "step": 2148 }, { "epoch": 0.7335722819593787, "grad_norm": 0.2357561101824314, "learning_rate": 4.1976479514415783e-05, "loss": 0.3863, "num_tokens": 409764744.0, "step": 2149 }, { "epoch": 0.7339136371394436, "grad_norm": 0.2580273651549136, "learning_rate": 4.197015680323723e-05, "loss": 0.3942, "num_tokens": 409915551.0, "step": 2150 }, { "epoch": 0.7342549923195084, "grad_norm": 0.259064296697188, "learning_rate": 4.196383409205868e-05, "loss": 0.4033, "num_tokens": 410138578.0, "step": 2151 }, { "epoch": 0.7345963474995733, "grad_norm": 0.21195617576405487, "learning_rate": 4.1957511380880124e-05, "loss": 0.3421, "num_tokens": 410340238.0, "step": 2152 }, { "epoch": 0.7349377026796382, "grad_norm": 0.24750351586999375, "learning_rate": 4.195118866970157e-05, "loss": 0.3775, "num_tokens": 410513946.0, "step": 2153 }, { "epoch": 0.735279057859703, "grad_norm": 0.21486321514834064, "learning_rate": 4.194486595852302e-05, "loss": 0.3448, "num_tokens": 410702753.0, "step": 2154 }, { "epoch": 0.7356204130397679, "grad_norm": 0.23577370669510692, "learning_rate": 4.1938543247344465e-05, "loss": 0.4017, "num_tokens": 410896699.0, "step": 2155 }, { "epoch": 0.7359617682198327, "grad_norm": 0.23179297951902106, "learning_rate": 4.193222053616591e-05, "loss": 0.3715, "num_tokens": 411057791.0, "step": 2156 }, { "epoch": 0.7363031233998976, "grad_norm": 0.24448075282230924, "learning_rate": 4.192589782498736e-05, "loss": 0.3732, "num_tokens": 411235923.0, "step": 2157 }, { "epoch": 0.7366444785799624, "grad_norm": 0.2344822191242064, "learning_rate": 4.19195751138088e-05, "loss": 0.381, "num_tokens": 411428103.0, "step": 2158 }, { "epoch": 0.7369858337600274, "grad_norm": 0.21261233734937862, "learning_rate": 4.1913252402630246e-05, "loss": 0.375, "num_tokens": 411650799.0, "step": 2159 }, { "epoch": 0.7373271889400922, "grad_norm": 0.20985504355609919, "learning_rate": 4.1906929691451693e-05, "loss": 0.3678, "num_tokens": 411854102.0, "step": 2160 }, { "epoch": 0.737668544120157, "grad_norm": 0.26090289586022825, "learning_rate": 4.190060698027314e-05, "loss": 0.4139, "num_tokens": 412097383.0, "step": 2161 }, { "epoch": 0.7380098993002219, "grad_norm": 0.23942771568758978, "learning_rate": 4.1894284269094594e-05, "loss": 0.3805, "num_tokens": 412275574.0, "step": 2162 }, { "epoch": 0.7383512544802867, "grad_norm": 0.23903957962965286, "learning_rate": 4.188796155791604e-05, "loss": 0.3897, "num_tokens": 412473390.0, "step": 2163 }, { "epoch": 0.7386926096603516, "grad_norm": 0.2630020571967151, "learning_rate": 4.188163884673748e-05, "loss": 0.3636, "num_tokens": 412663963.0, "step": 2164 }, { "epoch": 0.7390339648404165, "grad_norm": 0.2756218111322388, "learning_rate": 4.187531613555893e-05, "loss": 0.388, "num_tokens": 412818714.0, "step": 2165 }, { "epoch": 0.7393753200204813, "grad_norm": 0.2148537203451225, "learning_rate": 4.1868993424380375e-05, "loss": 0.3636, "num_tokens": 413032627.0, "step": 2166 }, { "epoch": 0.7397166752005462, "grad_norm": 0.23229977447457206, "learning_rate": 4.186267071320182e-05, "loss": 0.3929, "num_tokens": 413230403.0, "step": 2167 }, { "epoch": 0.740058030380611, "grad_norm": 0.23344508096827213, "learning_rate": 4.185634800202327e-05, "loss": 0.4025, "num_tokens": 413454701.0, "step": 2168 }, { "epoch": 0.7403993855606759, "grad_norm": 0.22848388002053885, "learning_rate": 4.1850025290844716e-05, "loss": 0.3421, "num_tokens": 413623028.0, "step": 2169 }, { "epoch": 0.7407407407407407, "grad_norm": 0.23299562807609195, "learning_rate": 4.184370257966616e-05, "loss": 0.3912, "num_tokens": 413824322.0, "step": 2170 }, { "epoch": 0.7410820959208057, "grad_norm": 0.2434953725416651, "learning_rate": 4.183737986848761e-05, "loss": 0.3793, "num_tokens": 414012712.0, "step": 2171 }, { "epoch": 0.7414234511008705, "grad_norm": 0.22819952711453947, "learning_rate": 4.183105715730906e-05, "loss": 0.3795, "num_tokens": 414186166.0, "step": 2172 }, { "epoch": 0.7417648062809353, "grad_norm": 0.24876241424087123, "learning_rate": 4.1824734446130504e-05, "loss": 0.3919, "num_tokens": 414369133.0, "step": 2173 }, { "epoch": 0.7421061614610002, "grad_norm": 0.2310880711118499, "learning_rate": 4.181841173495195e-05, "loss": 0.3359, "num_tokens": 414540726.0, "step": 2174 }, { "epoch": 0.742447516641065, "grad_norm": 0.2192064079607221, "learning_rate": 4.18120890237734e-05, "loss": 0.3375, "num_tokens": 414739283.0, "step": 2175 }, { "epoch": 0.7427888718211298, "grad_norm": 0.241664078206073, "learning_rate": 4.180576631259484e-05, "loss": 0.4068, "num_tokens": 414927203.0, "step": 2176 }, { "epoch": 0.7431302270011948, "grad_norm": 0.20895245835547904, "learning_rate": 4.1799443601416285e-05, "loss": 0.4114, "num_tokens": 415205352.0, "step": 2177 }, { "epoch": 0.7434715821812596, "grad_norm": 0.22396616894883578, "learning_rate": 4.179312089023774e-05, "loss": 0.3924, "num_tokens": 415413454.0, "step": 2178 }, { "epoch": 0.7438129373613245, "grad_norm": 0.22857365455709083, "learning_rate": 4.1786798179059186e-05, "loss": 0.3529, "num_tokens": 415617038.0, "step": 2179 }, { "epoch": 0.7441542925413893, "grad_norm": 0.21301224156809453, "learning_rate": 4.178047546788063e-05, "loss": 0.3656, "num_tokens": 415836494.0, "step": 2180 }, { "epoch": 0.7444956477214542, "grad_norm": 0.22670274894146564, "learning_rate": 4.177415275670208e-05, "loss": 0.3869, "num_tokens": 416019229.0, "step": 2181 }, { "epoch": 0.744837002901519, "grad_norm": 0.2395550657843444, "learning_rate": 4.176783004552352e-05, "loss": 0.3917, "num_tokens": 416236110.0, "step": 2182 }, { "epoch": 0.7451783580815838, "grad_norm": 0.23887944174025447, "learning_rate": 4.176150733434497e-05, "loss": 0.3686, "num_tokens": 416409861.0, "step": 2183 }, { "epoch": 0.7455197132616488, "grad_norm": 0.2428913218488811, "learning_rate": 4.1755184623166414e-05, "loss": 0.3735, "num_tokens": 416570412.0, "step": 2184 }, { "epoch": 0.7458610684417136, "grad_norm": 0.21239553938518932, "learning_rate": 4.174886191198786e-05, "loss": 0.3523, "num_tokens": 416753195.0, "step": 2185 }, { "epoch": 0.7462024236217785, "grad_norm": 0.45242158800214155, "learning_rate": 4.174253920080931e-05, "loss": 0.4084, "num_tokens": 416969316.0, "step": 2186 }, { "epoch": 0.7465437788018433, "grad_norm": 0.23194360863963187, "learning_rate": 4.1736216489630755e-05, "loss": 0.3463, "num_tokens": 417129074.0, "step": 2187 }, { "epoch": 0.7468851339819081, "grad_norm": 0.21530384018880158, "learning_rate": 4.17298937784522e-05, "loss": 0.3725, "num_tokens": 417330797.0, "step": 2188 }, { "epoch": 0.747226489161973, "grad_norm": 0.21259108867362245, "learning_rate": 4.172357106727365e-05, "loss": 0.3718, "num_tokens": 417564730.0, "step": 2189 }, { "epoch": 0.7475678443420379, "grad_norm": 0.25397760037133504, "learning_rate": 4.1717248356095096e-05, "loss": 0.3786, "num_tokens": 417728066.0, "step": 2190 }, { "epoch": 0.7479091995221028, "grad_norm": 0.22303835584238627, "learning_rate": 4.171092564491654e-05, "loss": 0.3472, "num_tokens": 417909310.0, "step": 2191 }, { "epoch": 0.7482505547021676, "grad_norm": 0.22848369853256187, "learning_rate": 4.170460293373799e-05, "loss": 0.3298, "num_tokens": 418076435.0, "step": 2192 }, { "epoch": 0.7485919098822325, "grad_norm": 0.2427364767498223, "learning_rate": 4.169828022255944e-05, "loss": 0.3622, "num_tokens": 418236555.0, "step": 2193 }, { "epoch": 0.7489332650622973, "grad_norm": 0.23118875539947803, "learning_rate": 4.169195751138088e-05, "loss": 0.351, "num_tokens": 418417762.0, "step": 2194 }, { "epoch": 0.7492746202423621, "grad_norm": 0.23506717394118085, "learning_rate": 4.168563480020233e-05, "loss": 0.3761, "num_tokens": 418586333.0, "step": 2195 }, { "epoch": 0.7496159754224271, "grad_norm": 0.23209409496546202, "learning_rate": 4.167931208902378e-05, "loss": 0.3499, "num_tokens": 418752061.0, "step": 2196 }, { "epoch": 0.7499573306024919, "grad_norm": 0.23869820613363066, "learning_rate": 4.1672989377845225e-05, "loss": 0.4056, "num_tokens": 418969955.0, "step": 2197 }, { "epoch": 0.7502986857825568, "grad_norm": 0.2638594030879724, "learning_rate": 4.166666666666667e-05, "loss": 0.3733, "num_tokens": 419139624.0, "step": 2198 }, { "epoch": 0.7506400409626216, "grad_norm": 0.27593109372148233, "learning_rate": 4.166034395548811e-05, "loss": 0.3867, "num_tokens": 419325155.0, "step": 2199 }, { "epoch": 0.7509813961426864, "grad_norm": 0.21994982265074778, "learning_rate": 4.165402124430956e-05, "loss": 0.3572, "num_tokens": 419511162.0, "step": 2200 }, { "epoch": 0.7513227513227513, "grad_norm": 0.21976571254570684, "learning_rate": 4.1647698533131006e-05, "loss": 0.3482, "num_tokens": 419700982.0, "step": 2201 }, { "epoch": 0.7516641065028162, "grad_norm": 0.252897926988204, "learning_rate": 4.164137582195245e-05, "loss": 0.3684, "num_tokens": 419843538.0, "step": 2202 }, { "epoch": 0.7520054616828811, "grad_norm": 0.25248676969084105, "learning_rate": 4.16350531107739e-05, "loss": 0.4001, "num_tokens": 420038154.0, "step": 2203 }, { "epoch": 0.7523468168629459, "grad_norm": 0.21977618141265778, "learning_rate": 4.162873039959535e-05, "loss": 0.3257, "num_tokens": 420239495.0, "step": 2204 }, { "epoch": 0.7526881720430108, "grad_norm": 0.24331815474247154, "learning_rate": 4.1622407688416794e-05, "loss": 0.3867, "num_tokens": 420450528.0, "step": 2205 }, { "epoch": 0.7530295272230756, "grad_norm": 0.23646837152034136, "learning_rate": 4.161608497723824e-05, "loss": 0.3527, "num_tokens": 420628674.0, "step": 2206 }, { "epoch": 0.7533708824031404, "grad_norm": 0.2204732912018039, "learning_rate": 4.160976226605969e-05, "loss": 0.3438, "num_tokens": 420800843.0, "step": 2207 }, { "epoch": 0.7537122375832054, "grad_norm": 0.23044933859152933, "learning_rate": 4.1603439554881135e-05, "loss": 0.3538, "num_tokens": 420967903.0, "step": 2208 }, { "epoch": 0.7540535927632702, "grad_norm": 0.25421043067640536, "learning_rate": 4.159711684370258e-05, "loss": 0.3864, "num_tokens": 421192863.0, "step": 2209 }, { "epoch": 0.7543949479433351, "grad_norm": 0.24461447894569124, "learning_rate": 4.159079413252403e-05, "loss": 0.342, "num_tokens": 421360831.0, "step": 2210 }, { "epoch": 0.7547363031233999, "grad_norm": 0.2644341113910285, "learning_rate": 4.158447142134547e-05, "loss": 0.378, "num_tokens": 421485775.0, "step": 2211 }, { "epoch": 0.7550776583034647, "grad_norm": 0.38673753357318613, "learning_rate": 4.157814871016692e-05, "loss": 0.4002, "num_tokens": 421663298.0, "step": 2212 }, { "epoch": 0.7554190134835296, "grad_norm": 0.23418677705388796, "learning_rate": 4.157182599898837e-05, "loss": 0.3667, "num_tokens": 421848878.0, "step": 2213 }, { "epoch": 0.7557603686635944, "grad_norm": 0.2417226208524211, "learning_rate": 4.156550328780982e-05, "loss": 0.3666, "num_tokens": 422030544.0, "step": 2214 }, { "epoch": 0.7561017238436594, "grad_norm": 0.225471574633711, "learning_rate": 4.1559180576631264e-05, "loss": 0.3605, "num_tokens": 422204573.0, "step": 2215 }, { "epoch": 0.7564430790237242, "grad_norm": 0.2771317388079084, "learning_rate": 4.155285786545271e-05, "loss": 0.3801, "num_tokens": 422388833.0, "step": 2216 }, { "epoch": 0.756784434203789, "grad_norm": 0.2525087990720659, "learning_rate": 4.154653515427415e-05, "loss": 0.3877, "num_tokens": 422582901.0, "step": 2217 }, { "epoch": 0.7571257893838539, "grad_norm": 0.24223064552262197, "learning_rate": 4.15402124430956e-05, "loss": 0.3543, "num_tokens": 422747142.0, "step": 2218 }, { "epoch": 0.7574671445639187, "grad_norm": 0.24181337548623957, "learning_rate": 4.1533889731917045e-05, "loss": 0.3836, "num_tokens": 422962167.0, "step": 2219 }, { "epoch": 0.7578084997439836, "grad_norm": 0.2538513296189922, "learning_rate": 4.152756702073849e-05, "loss": 0.36, "num_tokens": 423139917.0, "step": 2220 }, { "epoch": 0.7581498549240485, "grad_norm": 0.21968783163696454, "learning_rate": 4.152124430955994e-05, "loss": 0.3633, "num_tokens": 423331982.0, "step": 2221 }, { "epoch": 0.7584912101041134, "grad_norm": 0.2374864306896326, "learning_rate": 4.151492159838139e-05, "loss": 0.4027, "num_tokens": 423547154.0, "step": 2222 }, { "epoch": 0.7588325652841782, "grad_norm": 0.23064912148515332, "learning_rate": 4.150859888720283e-05, "loss": 0.3517, "num_tokens": 423725331.0, "step": 2223 }, { "epoch": 0.759173920464243, "grad_norm": 0.22204090817686176, "learning_rate": 4.150227617602428e-05, "loss": 0.368, "num_tokens": 423910057.0, "step": 2224 }, { "epoch": 0.7595152756443079, "grad_norm": 0.23034750883874391, "learning_rate": 4.149595346484573e-05, "loss": 0.3743, "num_tokens": 424071765.0, "step": 2225 }, { "epoch": 0.7598566308243727, "grad_norm": 0.23052557690481276, "learning_rate": 4.1489630753667174e-05, "loss": 0.3651, "num_tokens": 424259903.0, "step": 2226 }, { "epoch": 0.7601979860044377, "grad_norm": 0.21790099903847449, "learning_rate": 4.148330804248862e-05, "loss": 0.3335, "num_tokens": 424454741.0, "step": 2227 }, { "epoch": 0.7605393411845025, "grad_norm": 0.23490475041037714, "learning_rate": 4.147698533131007e-05, "loss": 0.3599, "num_tokens": 424628020.0, "step": 2228 }, { "epoch": 0.7608806963645673, "grad_norm": 0.30279951726408916, "learning_rate": 4.1470662620131515e-05, "loss": 0.3598, "num_tokens": 424779752.0, "step": 2229 }, { "epoch": 0.7612220515446322, "grad_norm": 0.24293348193357392, "learning_rate": 4.146433990895296e-05, "loss": 0.3979, "num_tokens": 424966112.0, "step": 2230 }, { "epoch": 0.761563406724697, "grad_norm": 0.2311065989939709, "learning_rate": 4.145801719777441e-05, "loss": 0.3919, "num_tokens": 425163846.0, "step": 2231 }, { "epoch": 0.7619047619047619, "grad_norm": 0.2548080611861048, "learning_rate": 4.1451694486595856e-05, "loss": 0.3454, "num_tokens": 425331078.0, "step": 2232 }, { "epoch": 0.7622461170848268, "grad_norm": 0.2545400132540436, "learning_rate": 4.14453717754173e-05, "loss": 0.3839, "num_tokens": 425531407.0, "step": 2233 }, { "epoch": 0.7625874722648917, "grad_norm": 0.2200358557685117, "learning_rate": 4.143904906423875e-05, "loss": 0.3723, "num_tokens": 425730878.0, "step": 2234 }, { "epoch": 0.7629288274449565, "grad_norm": 0.23023979875126027, "learning_rate": 4.143272635306019e-05, "loss": 0.3607, "num_tokens": 425927844.0, "step": 2235 }, { "epoch": 0.7632701826250213, "grad_norm": 0.21699845972564755, "learning_rate": 4.142640364188164e-05, "loss": 0.3573, "num_tokens": 426134693.0, "step": 2236 }, { "epoch": 0.7636115378050862, "grad_norm": 0.2507762613861895, "learning_rate": 4.1420080930703084e-05, "loss": 0.3798, "num_tokens": 426332752.0, "step": 2237 }, { "epoch": 0.763952892985151, "grad_norm": 0.24233113521240815, "learning_rate": 4.141375821952453e-05, "loss": 0.3849, "num_tokens": 426513535.0, "step": 2238 }, { "epoch": 0.764294248165216, "grad_norm": 0.2567169933049899, "learning_rate": 4.1407435508345985e-05, "loss": 0.339, "num_tokens": 426662540.0, "step": 2239 }, { "epoch": 0.7646356033452808, "grad_norm": 0.381764405744133, "learning_rate": 4.140111279716743e-05, "loss": 0.3429, "num_tokens": 426853778.0, "step": 2240 }, { "epoch": 0.7649769585253456, "grad_norm": 0.2281494359860197, "learning_rate": 4.139479008598887e-05, "loss": 0.3633, "num_tokens": 427024106.0, "step": 2241 }, { "epoch": 0.7653183137054105, "grad_norm": 0.24840035215087103, "learning_rate": 4.138846737481032e-05, "loss": 0.3865, "num_tokens": 427202798.0, "step": 2242 }, { "epoch": 0.7656596688854753, "grad_norm": 0.21980702216689643, "learning_rate": 4.1382144663631766e-05, "loss": 0.3699, "num_tokens": 427385375.0, "step": 2243 }, { "epoch": 0.7660010240655402, "grad_norm": 0.21726645313148948, "learning_rate": 4.137582195245321e-05, "loss": 0.3599, "num_tokens": 427579605.0, "step": 2244 }, { "epoch": 0.766342379245605, "grad_norm": 0.2118846458355601, "learning_rate": 4.136949924127466e-05, "loss": 0.3662, "num_tokens": 427781132.0, "step": 2245 }, { "epoch": 0.76668373442567, "grad_norm": 0.2327424369677816, "learning_rate": 4.136317653009611e-05, "loss": 0.3566, "num_tokens": 427951144.0, "step": 2246 }, { "epoch": 0.7670250896057348, "grad_norm": 0.22889415295692653, "learning_rate": 4.1356853818917554e-05, "loss": 0.3603, "num_tokens": 428133672.0, "step": 2247 }, { "epoch": 0.7673664447857996, "grad_norm": 0.21959007379725956, "learning_rate": 4.1350531107739e-05, "loss": 0.3843, "num_tokens": 428331505.0, "step": 2248 }, { "epoch": 0.7677077999658645, "grad_norm": 0.22029302981070253, "learning_rate": 4.134420839656045e-05, "loss": 0.359, "num_tokens": 428542967.0, "step": 2249 }, { "epoch": 0.7680491551459293, "grad_norm": 0.22602631752516644, "learning_rate": 4.1337885685381895e-05, "loss": 0.362, "num_tokens": 428781571.0, "step": 2250 }, { "epoch": 0.7683905103259941, "grad_norm": 0.214907912942968, "learning_rate": 4.133156297420334e-05, "loss": 0.3572, "num_tokens": 428986621.0, "step": 2251 }, { "epoch": 0.7687318655060591, "grad_norm": 0.21384762323766823, "learning_rate": 4.132524026302479e-05, "loss": 0.371, "num_tokens": 429195608.0, "step": 2252 }, { "epoch": 0.7690732206861239, "grad_norm": 0.2239132519222661, "learning_rate": 4.131891755184623e-05, "loss": 0.4137, "num_tokens": 429412534.0, "step": 2253 }, { "epoch": 0.7694145758661888, "grad_norm": 0.22907131518391055, "learning_rate": 4.1312594840667676e-05, "loss": 0.3333, "num_tokens": 429565453.0, "step": 2254 }, { "epoch": 0.7697559310462536, "grad_norm": 0.21941957342549845, "learning_rate": 4.130627212948913e-05, "loss": 0.3747, "num_tokens": 429771056.0, "step": 2255 }, { "epoch": 0.7700972862263185, "grad_norm": 0.21622300413536785, "learning_rate": 4.129994941831058e-05, "loss": 0.3294, "num_tokens": 429958508.0, "step": 2256 }, { "epoch": 0.7704386414063833, "grad_norm": 0.30154555147142353, "learning_rate": 4.1293626707132024e-05, "loss": 0.3759, "num_tokens": 430167237.0, "step": 2257 }, { "epoch": 0.7707799965864482, "grad_norm": 0.22775972122711405, "learning_rate": 4.128730399595347e-05, "loss": 0.3756, "num_tokens": 430379075.0, "step": 2258 }, { "epoch": 0.7711213517665131, "grad_norm": 0.2304607334272212, "learning_rate": 4.128098128477491e-05, "loss": 0.3328, "num_tokens": 430549033.0, "step": 2259 }, { "epoch": 0.7714627069465779, "grad_norm": 0.2893274518039365, "learning_rate": 4.127465857359636e-05, "loss": 0.3865, "num_tokens": 430736693.0, "step": 2260 }, { "epoch": 0.7718040621266428, "grad_norm": 0.2128872668759772, "learning_rate": 4.1268335862417805e-05, "loss": 0.3328, "num_tokens": 430912444.0, "step": 2261 }, { "epoch": 0.7721454173067076, "grad_norm": 0.23993857092682155, "learning_rate": 4.126201315123925e-05, "loss": 0.358, "num_tokens": 431080467.0, "step": 2262 }, { "epoch": 0.7724867724867724, "grad_norm": 0.2532797199505514, "learning_rate": 4.12556904400607e-05, "loss": 0.3964, "num_tokens": 431280700.0, "step": 2263 }, { "epoch": 0.7728281276668374, "grad_norm": 0.22047819789974876, "learning_rate": 4.1249367728882146e-05, "loss": 0.3717, "num_tokens": 431480199.0, "step": 2264 }, { "epoch": 0.7731694828469022, "grad_norm": 0.23727647535297994, "learning_rate": 4.124304501770359e-05, "loss": 0.4127, "num_tokens": 431672696.0, "step": 2265 }, { "epoch": 0.7735108380269671, "grad_norm": 0.23803826745581727, "learning_rate": 4.123672230652504e-05, "loss": 0.3853, "num_tokens": 431866882.0, "step": 2266 }, { "epoch": 0.7738521932070319, "grad_norm": 0.23156635265733647, "learning_rate": 4.123039959534649e-05, "loss": 0.3648, "num_tokens": 432077572.0, "step": 2267 }, { "epoch": 0.7741935483870968, "grad_norm": 0.2579404955681113, "learning_rate": 4.1224076884167934e-05, "loss": 0.365, "num_tokens": 432279438.0, "step": 2268 }, { "epoch": 0.7745349035671616, "grad_norm": 0.2369800005328791, "learning_rate": 4.121775417298938e-05, "loss": 0.3946, "num_tokens": 432464986.0, "step": 2269 }, { "epoch": 0.7748762587472265, "grad_norm": 0.22359871246087734, "learning_rate": 4.121143146181083e-05, "loss": 0.3545, "num_tokens": 432659991.0, "step": 2270 }, { "epoch": 0.7752176139272914, "grad_norm": 0.21752870739285865, "learning_rate": 4.120510875063227e-05, "loss": 0.3576, "num_tokens": 432864722.0, "step": 2271 }, { "epoch": 0.7755589691073562, "grad_norm": 0.25175265449198997, "learning_rate": 4.119878603945372e-05, "loss": 0.4024, "num_tokens": 433045599.0, "step": 2272 }, { "epoch": 0.7759003242874211, "grad_norm": 0.23804216024631533, "learning_rate": 4.119246332827517e-05, "loss": 0.3867, "num_tokens": 433249115.0, "step": 2273 }, { "epoch": 0.7762416794674859, "grad_norm": 0.2464556828765742, "learning_rate": 4.1186140617096616e-05, "loss": 0.4, "num_tokens": 433457538.0, "step": 2274 }, { "epoch": 0.7765830346475507, "grad_norm": 0.21571955603731466, "learning_rate": 4.117981790591806e-05, "loss": 0.3402, "num_tokens": 433660440.0, "step": 2275 }, { "epoch": 0.7769243898276156, "grad_norm": 0.2415995544774632, "learning_rate": 4.117349519473951e-05, "loss": 0.3838, "num_tokens": 433829515.0, "step": 2276 }, { "epoch": 0.7772657450076805, "grad_norm": 0.2366741238245742, "learning_rate": 4.116717248356095e-05, "loss": 0.3481, "num_tokens": 433980797.0, "step": 2277 }, { "epoch": 0.7776071001877454, "grad_norm": 0.20393352879845097, "learning_rate": 4.11608497723824e-05, "loss": 0.3185, "num_tokens": 434156664.0, "step": 2278 }, { "epoch": 0.7779484553678102, "grad_norm": 0.23399516336994344, "learning_rate": 4.1154527061203844e-05, "loss": 0.3596, "num_tokens": 434335886.0, "step": 2279 }, { "epoch": 0.778289810547875, "grad_norm": 0.2550642760373562, "learning_rate": 4.114820435002529e-05, "loss": 0.3698, "num_tokens": 434522886.0, "step": 2280 }, { "epoch": 0.7786311657279399, "grad_norm": 0.2201564365992986, "learning_rate": 4.114188163884674e-05, "loss": 0.3683, "num_tokens": 434741754.0, "step": 2281 }, { "epoch": 0.7789725209080047, "grad_norm": 0.22424839384441225, "learning_rate": 4.113555892766819e-05, "loss": 0.3622, "num_tokens": 434931662.0, "step": 2282 }, { "epoch": 0.7793138760880697, "grad_norm": 0.23008105026459943, "learning_rate": 4.112923621648963e-05, "loss": 0.3617, "num_tokens": 435125061.0, "step": 2283 }, { "epoch": 0.7796552312681345, "grad_norm": 0.2310187395890545, "learning_rate": 4.112291350531108e-05, "loss": 0.4067, "num_tokens": 435328926.0, "step": 2284 }, { "epoch": 0.7799965864481994, "grad_norm": 0.2611410093860993, "learning_rate": 4.1116590794132526e-05, "loss": 0.3822, "num_tokens": 435534314.0, "step": 2285 }, { "epoch": 0.7803379416282642, "grad_norm": 0.20641740184543697, "learning_rate": 4.111026808295397e-05, "loss": 0.3454, "num_tokens": 435740261.0, "step": 2286 }, { "epoch": 0.780679296808329, "grad_norm": 0.26317226190798015, "learning_rate": 4.110394537177542e-05, "loss": 0.3996, "num_tokens": 435941042.0, "step": 2287 }, { "epoch": 0.7810206519883939, "grad_norm": 0.22806453738083615, "learning_rate": 4.109762266059687e-05, "loss": 0.3713, "num_tokens": 436137968.0, "step": 2288 }, { "epoch": 0.7813620071684588, "grad_norm": 0.21865772694853436, "learning_rate": 4.1091299949418314e-05, "loss": 0.3644, "num_tokens": 436341294.0, "step": 2289 }, { "epoch": 0.7817033623485237, "grad_norm": 0.2387369785933517, "learning_rate": 4.108497723823976e-05, "loss": 0.3549, "num_tokens": 436508881.0, "step": 2290 }, { "epoch": 0.7820447175285885, "grad_norm": 0.2480363146516666, "learning_rate": 4.107865452706121e-05, "loss": 0.3636, "num_tokens": 436682233.0, "step": 2291 }, { "epoch": 0.7823860727086533, "grad_norm": 0.2315567184450892, "learning_rate": 4.1072331815882655e-05, "loss": 0.3534, "num_tokens": 436859607.0, "step": 2292 }, { "epoch": 0.7827274278887182, "grad_norm": 0.22073147903481777, "learning_rate": 4.10660091047041e-05, "loss": 0.3607, "num_tokens": 437044974.0, "step": 2293 }, { "epoch": 0.783068783068783, "grad_norm": 0.20922491440941463, "learning_rate": 4.105968639352555e-05, "loss": 0.3488, "num_tokens": 437233727.0, "step": 2294 }, { "epoch": 0.783410138248848, "grad_norm": 0.2200355319767209, "learning_rate": 4.105336368234699e-05, "loss": 0.3559, "num_tokens": 437433060.0, "step": 2295 }, { "epoch": 0.7837514934289128, "grad_norm": 0.25201285490536335, "learning_rate": 4.1047040971168436e-05, "loss": 0.4004, "num_tokens": 437660030.0, "step": 2296 }, { "epoch": 0.7840928486089777, "grad_norm": 0.2419403572357629, "learning_rate": 4.104071825998988e-05, "loss": 0.3545, "num_tokens": 437816847.0, "step": 2297 }, { "epoch": 0.7844342037890425, "grad_norm": 0.21049198362330782, "learning_rate": 4.103439554881133e-05, "loss": 0.3515, "num_tokens": 438005179.0, "step": 2298 }, { "epoch": 0.7847755589691073, "grad_norm": 0.2093642162917339, "learning_rate": 4.1028072837632783e-05, "loss": 0.3811, "num_tokens": 438233376.0, "step": 2299 }, { "epoch": 0.7851169141491722, "grad_norm": 0.21660807990791578, "learning_rate": 4.102175012645423e-05, "loss": 0.3783, "num_tokens": 438448292.0, "step": 2300 }, { "epoch": 0.7854582693292371, "grad_norm": 0.20867840426903345, "learning_rate": 4.101542741527567e-05, "loss": 0.3912, "num_tokens": 438693389.0, "step": 2301 }, { "epoch": 0.785799624509302, "grad_norm": 0.2176095399616992, "learning_rate": 4.100910470409712e-05, "loss": 0.3684, "num_tokens": 438898046.0, "step": 2302 }, { "epoch": 0.7861409796893668, "grad_norm": 0.20748430704213638, "learning_rate": 4.1002781992918565e-05, "loss": 0.3707, "num_tokens": 439139417.0, "step": 2303 }, { "epoch": 0.7864823348694316, "grad_norm": 0.23192371512760263, "learning_rate": 4.099645928174001e-05, "loss": 0.3892, "num_tokens": 439326217.0, "step": 2304 }, { "epoch": 0.7868236900494965, "grad_norm": 0.225084631204581, "learning_rate": 4.099013657056146e-05, "loss": 0.3867, "num_tokens": 439521475.0, "step": 2305 }, { "epoch": 0.7871650452295613, "grad_norm": 0.2182415918775231, "learning_rate": 4.0983813859382906e-05, "loss": 0.3795, "num_tokens": 439724245.0, "step": 2306 }, { "epoch": 0.7875064004096262, "grad_norm": 0.20659083797850855, "learning_rate": 4.097749114820435e-05, "loss": 0.3587, "num_tokens": 439947329.0, "step": 2307 }, { "epoch": 0.7878477555896911, "grad_norm": 0.23198260035424448, "learning_rate": 4.09711684370258e-05, "loss": 0.3662, "num_tokens": 440149753.0, "step": 2308 }, { "epoch": 0.788189110769756, "grad_norm": 0.22054555614954222, "learning_rate": 4.0964845725847247e-05, "loss": 0.3622, "num_tokens": 440359087.0, "step": 2309 }, { "epoch": 0.7885304659498208, "grad_norm": 0.21264886294297, "learning_rate": 4.0958523014668693e-05, "loss": 0.3581, "num_tokens": 440564559.0, "step": 2310 }, { "epoch": 0.7888718211298856, "grad_norm": 0.23667345656558075, "learning_rate": 4.095220030349014e-05, "loss": 0.3675, "num_tokens": 440727382.0, "step": 2311 }, { "epoch": 0.7892131763099505, "grad_norm": 0.20195838104240857, "learning_rate": 4.094587759231159e-05, "loss": 0.3867, "num_tokens": 440964996.0, "step": 2312 }, { "epoch": 0.7895545314900153, "grad_norm": 0.2344612898841937, "learning_rate": 4.093955488113303e-05, "loss": 0.3836, "num_tokens": 441157037.0, "step": 2313 }, { "epoch": 0.7898958866700803, "grad_norm": 0.22093815520757776, "learning_rate": 4.0933232169954475e-05, "loss": 0.3765, "num_tokens": 441364018.0, "step": 2314 }, { "epoch": 0.7902372418501451, "grad_norm": 0.21751633262151082, "learning_rate": 4.092690945877592e-05, "loss": 0.3773, "num_tokens": 441562280.0, "step": 2315 }, { "epoch": 0.7905785970302099, "grad_norm": 0.2142404423048734, "learning_rate": 4.0920586747597375e-05, "loss": 0.3903, "num_tokens": 441774104.0, "step": 2316 }, { "epoch": 0.7909199522102748, "grad_norm": 0.24264160434922077, "learning_rate": 4.091426403641882e-05, "loss": 0.3496, "num_tokens": 441932776.0, "step": 2317 }, { "epoch": 0.7912613073903396, "grad_norm": 0.2367312220757913, "learning_rate": 4.090794132524027e-05, "loss": 0.3825, "num_tokens": 442083050.0, "step": 2318 }, { "epoch": 0.7916026625704045, "grad_norm": 0.21754159503194354, "learning_rate": 4.090161861406171e-05, "loss": 0.373, "num_tokens": 442280195.0, "step": 2319 }, { "epoch": 0.7919440177504694, "grad_norm": 0.22935392940354415, "learning_rate": 4.0895295902883157e-05, "loss": 0.4037, "num_tokens": 442484867.0, "step": 2320 }, { "epoch": 0.7922853729305342, "grad_norm": 0.22776856175641658, "learning_rate": 4.0888973191704604e-05, "loss": 0.3918, "num_tokens": 442689827.0, "step": 2321 }, { "epoch": 0.7926267281105991, "grad_norm": 0.22328302523005367, "learning_rate": 4.088265048052605e-05, "loss": 0.3646, "num_tokens": 442887712.0, "step": 2322 }, { "epoch": 0.7929680832906639, "grad_norm": 0.23005842693524017, "learning_rate": 4.08763277693475e-05, "loss": 0.3662, "num_tokens": 443075932.0, "step": 2323 }, { "epoch": 0.7933094384707288, "grad_norm": 0.23012050144501295, "learning_rate": 4.0870005058168944e-05, "loss": 0.3476, "num_tokens": 443231674.0, "step": 2324 }, { "epoch": 0.7936507936507936, "grad_norm": 0.23389366157404343, "learning_rate": 4.086368234699039e-05, "loss": 0.3953, "num_tokens": 443414080.0, "step": 2325 }, { "epoch": 0.7939921488308586, "grad_norm": 0.19843300869194086, "learning_rate": 4.085735963581184e-05, "loss": 0.3545, "num_tokens": 443644107.0, "step": 2326 }, { "epoch": 0.7943335040109234, "grad_norm": 0.24802935393866513, "learning_rate": 4.0851036924633285e-05, "loss": 0.3955, "num_tokens": 443801808.0, "step": 2327 }, { "epoch": 0.7946748591909882, "grad_norm": 0.2274594658472311, "learning_rate": 4.084471421345473e-05, "loss": 0.3536, "num_tokens": 443986339.0, "step": 2328 }, { "epoch": 0.7950162143710531, "grad_norm": 0.2249541138837461, "learning_rate": 4.083839150227618e-05, "loss": 0.3921, "num_tokens": 444184647.0, "step": 2329 }, { "epoch": 0.7953575695511179, "grad_norm": 0.25884088672618905, "learning_rate": 4.083206879109762e-05, "loss": 0.3714, "num_tokens": 444310483.0, "step": 2330 }, { "epoch": 0.7956989247311828, "grad_norm": 0.2497479656896428, "learning_rate": 4.0825746079919067e-05, "loss": 0.3874, "num_tokens": 444522544.0, "step": 2331 }, { "epoch": 0.7960402799112477, "grad_norm": 0.22852144494885523, "learning_rate": 4.081942336874052e-05, "loss": 0.3711, "num_tokens": 444713842.0, "step": 2332 }, { "epoch": 0.7963816350913125, "grad_norm": 0.22252314994032898, "learning_rate": 4.081310065756197e-05, "loss": 0.3879, "num_tokens": 444952711.0, "step": 2333 }, { "epoch": 0.7967229902713774, "grad_norm": 0.26830721237738364, "learning_rate": 4.0806777946383414e-05, "loss": 0.4089, "num_tokens": 445138294.0, "step": 2334 }, { "epoch": 0.7970643454514422, "grad_norm": 0.2446498134555734, "learning_rate": 4.080045523520486e-05, "loss": 0.3445, "num_tokens": 445353199.0, "step": 2335 }, { "epoch": 0.7974057006315071, "grad_norm": 0.2242924703798101, "learning_rate": 4.07941325240263e-05, "loss": 0.3693, "num_tokens": 445545134.0, "step": 2336 }, { "epoch": 0.7977470558115719, "grad_norm": 0.23012792782072847, "learning_rate": 4.078780981284775e-05, "loss": 0.3599, "num_tokens": 445764501.0, "step": 2337 }, { "epoch": 0.7980884109916367, "grad_norm": 0.2306510201440804, "learning_rate": 4.0781487101669195e-05, "loss": 0.3454, "num_tokens": 445937123.0, "step": 2338 }, { "epoch": 0.7984297661717017, "grad_norm": 0.23425093280232617, "learning_rate": 4.077516439049064e-05, "loss": 0.3735, "num_tokens": 446139666.0, "step": 2339 }, { "epoch": 0.7987711213517665, "grad_norm": 0.25181955961107544, "learning_rate": 4.076884167931209e-05, "loss": 0.3537, "num_tokens": 446282695.0, "step": 2340 }, { "epoch": 0.7991124765318314, "grad_norm": 0.2408121496758225, "learning_rate": 4.0762518968133536e-05, "loss": 0.3686, "num_tokens": 446463321.0, "step": 2341 }, { "epoch": 0.7994538317118962, "grad_norm": 0.2600180900767226, "learning_rate": 4.0756196256954983e-05, "loss": 0.3849, "num_tokens": 446635356.0, "step": 2342 }, { "epoch": 0.799795186891961, "grad_norm": 0.2418732527524055, "learning_rate": 4.074987354577643e-05, "loss": 0.3379, "num_tokens": 446814004.0, "step": 2343 }, { "epoch": 0.8001365420720259, "grad_norm": 0.2415191972447903, "learning_rate": 4.074355083459788e-05, "loss": 0.3665, "num_tokens": 446983521.0, "step": 2344 }, { "epoch": 0.8004778972520908, "grad_norm": 0.24379639478157708, "learning_rate": 4.0737228123419324e-05, "loss": 0.3823, "num_tokens": 447153292.0, "step": 2345 }, { "epoch": 0.8008192524321557, "grad_norm": 0.22697351362232768, "learning_rate": 4.073090541224077e-05, "loss": 0.366, "num_tokens": 447349316.0, "step": 2346 }, { "epoch": 0.8011606076122205, "grad_norm": 0.25914588616582335, "learning_rate": 4.072458270106222e-05, "loss": 0.4074, "num_tokens": 447526517.0, "step": 2347 }, { "epoch": 0.8015019627922854, "grad_norm": 0.23543510592531713, "learning_rate": 4.071825998988366e-05, "loss": 0.3649, "num_tokens": 447722079.0, "step": 2348 }, { "epoch": 0.8018433179723502, "grad_norm": 0.30691292140754856, "learning_rate": 4.071193727870511e-05, "loss": 0.356, "num_tokens": 447910477.0, "step": 2349 }, { "epoch": 0.802184673152415, "grad_norm": 0.24024188818448297, "learning_rate": 4.070561456752656e-05, "loss": 0.3893, "num_tokens": 448092703.0, "step": 2350 }, { "epoch": 0.80252602833248, "grad_norm": 0.22726962406630277, "learning_rate": 4.0699291856348006e-05, "loss": 0.3956, "num_tokens": 448321395.0, "step": 2351 }, { "epoch": 0.8028673835125448, "grad_norm": 0.23134274610360775, "learning_rate": 4.069296914516945e-05, "loss": 0.3603, "num_tokens": 448501193.0, "step": 2352 }, { "epoch": 0.8032087386926097, "grad_norm": 0.2584992805679936, "learning_rate": 4.06866464339909e-05, "loss": 0.4117, "num_tokens": 448682734.0, "step": 2353 }, { "epoch": 0.8035500938726745, "grad_norm": 0.21900501513659554, "learning_rate": 4.068032372281234e-05, "loss": 0.3857, "num_tokens": 448908753.0, "step": 2354 }, { "epoch": 0.8038914490527393, "grad_norm": 0.23238086954624362, "learning_rate": 4.067400101163379e-05, "loss": 0.3914, "num_tokens": 449104124.0, "step": 2355 }, { "epoch": 0.8042328042328042, "grad_norm": 0.24029389827138242, "learning_rate": 4.0667678300455234e-05, "loss": 0.393, "num_tokens": 449249488.0, "step": 2356 }, { "epoch": 0.8045741594128691, "grad_norm": 0.22856873946172443, "learning_rate": 4.066135558927668e-05, "loss": 0.3796, "num_tokens": 449451208.0, "step": 2357 }, { "epoch": 0.804915514592934, "grad_norm": 0.25279370879513685, "learning_rate": 4.065503287809813e-05, "loss": 0.4307, "num_tokens": 449658048.0, "step": 2358 }, { "epoch": 0.8052568697729988, "grad_norm": 0.20536172006899506, "learning_rate": 4.064871016691958e-05, "loss": 0.3783, "num_tokens": 449896202.0, "step": 2359 }, { "epoch": 0.8055982249530637, "grad_norm": 0.2187228531003452, "learning_rate": 4.064238745574102e-05, "loss": 0.3711, "num_tokens": 450119952.0, "step": 2360 }, { "epoch": 0.8059395801331285, "grad_norm": 0.20625642481419615, "learning_rate": 4.063606474456247e-05, "loss": 0.3459, "num_tokens": 450356882.0, "step": 2361 }, { "epoch": 0.8062809353131933, "grad_norm": 0.2804195001352161, "learning_rate": 4.0629742033383916e-05, "loss": 0.3821, "num_tokens": 450495055.0, "step": 2362 }, { "epoch": 0.8066222904932583, "grad_norm": 0.21946942142334, "learning_rate": 4.062341932220536e-05, "loss": 0.3892, "num_tokens": 450745253.0, "step": 2363 }, { "epoch": 0.8069636456733231, "grad_norm": 0.23378727379039527, "learning_rate": 4.061709661102681e-05, "loss": 0.3785, "num_tokens": 450901483.0, "step": 2364 }, { "epoch": 0.807305000853388, "grad_norm": 0.23031850251745978, "learning_rate": 4.061077389984826e-05, "loss": 0.4014, "num_tokens": 451101844.0, "step": 2365 }, { "epoch": 0.8076463560334528, "grad_norm": 0.21247828488481832, "learning_rate": 4.0604451188669704e-05, "loss": 0.3656, "num_tokens": 451309441.0, "step": 2366 }, { "epoch": 0.8079877112135176, "grad_norm": 0.22075726845523508, "learning_rate": 4.059812847749115e-05, "loss": 0.3487, "num_tokens": 451485215.0, "step": 2367 }, { "epoch": 0.8083290663935825, "grad_norm": 0.2455630453349609, "learning_rate": 4.05918057663126e-05, "loss": 0.3923, "num_tokens": 451654679.0, "step": 2368 }, { "epoch": 0.8086704215736474, "grad_norm": 0.231300740324202, "learning_rate": 4.0585483055134045e-05, "loss": 0.3282, "num_tokens": 451812448.0, "step": 2369 }, { "epoch": 0.8090117767537123, "grad_norm": 0.22614558972455984, "learning_rate": 4.057916034395549e-05, "loss": 0.3804, "num_tokens": 451989617.0, "step": 2370 }, { "epoch": 0.8093531319337771, "grad_norm": 0.216201138700286, "learning_rate": 4.057283763277694e-05, "loss": 0.3742, "num_tokens": 452181097.0, "step": 2371 }, { "epoch": 0.809694487113842, "grad_norm": 0.21292604490031158, "learning_rate": 4.056651492159838e-05, "loss": 0.3626, "num_tokens": 452386679.0, "step": 2372 }, { "epoch": 0.8100358422939068, "grad_norm": 0.20564435252754756, "learning_rate": 4.0560192210419826e-05, "loss": 0.3622, "num_tokens": 452595839.0, "step": 2373 }, { "epoch": 0.8103771974739716, "grad_norm": 0.19766887047162301, "learning_rate": 4.055386949924127e-05, "loss": 0.3675, "num_tokens": 452830828.0, "step": 2374 }, { "epoch": 0.8107185526540365, "grad_norm": 0.21959496286142913, "learning_rate": 4.054754678806272e-05, "loss": 0.3528, "num_tokens": 453005075.0, "step": 2375 }, { "epoch": 0.8110599078341014, "grad_norm": 0.2261460254615041, "learning_rate": 4.0541224076884174e-05, "loss": 0.3486, "num_tokens": 453194719.0, "step": 2376 }, { "epoch": 0.8114012630141663, "grad_norm": 0.22809100036371616, "learning_rate": 4.053490136570562e-05, "loss": 0.3689, "num_tokens": 453380255.0, "step": 2377 }, { "epoch": 0.8117426181942311, "grad_norm": 0.22028214799096144, "learning_rate": 4.052857865452706e-05, "loss": 0.3722, "num_tokens": 453574959.0, "step": 2378 }, { "epoch": 0.8120839733742959, "grad_norm": 0.24520296205202183, "learning_rate": 4.052225594334851e-05, "loss": 0.3965, "num_tokens": 453744730.0, "step": 2379 }, { "epoch": 0.8124253285543608, "grad_norm": 0.24739840783760808, "learning_rate": 4.0515933232169955e-05, "loss": 0.3715, "num_tokens": 453923541.0, "step": 2380 }, { "epoch": 0.8127666837344256, "grad_norm": 0.21181807545329293, "learning_rate": 4.05096105209914e-05, "loss": 0.3518, "num_tokens": 454092813.0, "step": 2381 }, { "epoch": 0.8131080389144906, "grad_norm": 0.22364276177378326, "learning_rate": 4.050328780981285e-05, "loss": 0.3715, "num_tokens": 454290642.0, "step": 2382 }, { "epoch": 0.8134493940945554, "grad_norm": 0.2155148795492629, "learning_rate": 4.0496965098634296e-05, "loss": 0.3371, "num_tokens": 454458536.0, "step": 2383 }, { "epoch": 0.8137907492746203, "grad_norm": 0.21992726009705188, "learning_rate": 4.049064238745574e-05, "loss": 0.3863, "num_tokens": 454673783.0, "step": 2384 }, { "epoch": 0.8141321044546851, "grad_norm": 0.22574040996430875, "learning_rate": 4.048431967627719e-05, "loss": 0.3586, "num_tokens": 454885162.0, "step": 2385 }, { "epoch": 0.8144734596347499, "grad_norm": 0.22424315898602976, "learning_rate": 4.047799696509864e-05, "loss": 0.3456, "num_tokens": 455077809.0, "step": 2386 }, { "epoch": 0.8148148148148148, "grad_norm": 0.2440053160576042, "learning_rate": 4.0471674253920084e-05, "loss": 0.3323, "num_tokens": 455245417.0, "step": 2387 }, { "epoch": 0.8151561699948797, "grad_norm": 0.22978426597530952, "learning_rate": 4.046535154274153e-05, "loss": 0.3699, "num_tokens": 455424770.0, "step": 2388 }, { "epoch": 0.8154975251749446, "grad_norm": 0.21904229434330355, "learning_rate": 4.045902883156298e-05, "loss": 0.3585, "num_tokens": 455631140.0, "step": 2389 }, { "epoch": 0.8158388803550094, "grad_norm": 0.21182145776609543, "learning_rate": 4.045270612038442e-05, "loss": 0.3609, "num_tokens": 455853736.0, "step": 2390 }, { "epoch": 0.8161802355350742, "grad_norm": 0.2237464396511559, "learning_rate": 4.0446383409205865e-05, "loss": 0.3674, "num_tokens": 456031493.0, "step": 2391 }, { "epoch": 0.8165215907151391, "grad_norm": 0.23425969160247612, "learning_rate": 4.044006069802731e-05, "loss": 0.3591, "num_tokens": 456186765.0, "step": 2392 }, { "epoch": 0.8168629458952039, "grad_norm": 0.25950041103531574, "learning_rate": 4.0433737986848766e-05, "loss": 0.3684, "num_tokens": 456375177.0, "step": 2393 }, { "epoch": 0.8172043010752689, "grad_norm": 0.21639154381166076, "learning_rate": 4.042741527567021e-05, "loss": 0.3639, "num_tokens": 456576770.0, "step": 2394 }, { "epoch": 0.8175456562553337, "grad_norm": 0.34257580867399495, "learning_rate": 4.042109256449166e-05, "loss": 0.4113, "num_tokens": 456758685.0, "step": 2395 }, { "epoch": 0.8178870114353985, "grad_norm": 0.22952373423314879, "learning_rate": 4.04147698533131e-05, "loss": 0.3794, "num_tokens": 456950963.0, "step": 2396 }, { "epoch": 0.8182283666154634, "grad_norm": 0.22322749924868854, "learning_rate": 4.040844714213455e-05, "loss": 0.3528, "num_tokens": 457166380.0, "step": 2397 }, { "epoch": 0.8185697217955282, "grad_norm": 0.20732228325939472, "learning_rate": 4.0402124430955994e-05, "loss": 0.3672, "num_tokens": 457386629.0, "step": 2398 }, { "epoch": 0.8189110769755931, "grad_norm": 0.2287258169207251, "learning_rate": 4.039580171977744e-05, "loss": 0.4, "num_tokens": 457564252.0, "step": 2399 }, { "epoch": 0.819252432155658, "grad_norm": 0.229431365682315, "learning_rate": 4.038947900859889e-05, "loss": 0.3966, "num_tokens": 457758333.0, "step": 2400 }, { "epoch": 0.8195937873357229, "grad_norm": 0.2176130807935935, "learning_rate": 4.0383156297420335e-05, "loss": 0.3796, "num_tokens": 457971926.0, "step": 2401 }, { "epoch": 0.8199351425157877, "grad_norm": 0.21252459977765084, "learning_rate": 4.037683358624178e-05, "loss": 0.3808, "num_tokens": 458164402.0, "step": 2402 }, { "epoch": 0.8202764976958525, "grad_norm": 0.22410216386508908, "learning_rate": 4.037051087506323e-05, "loss": 0.3806, "num_tokens": 458339988.0, "step": 2403 }, { "epoch": 0.8206178528759174, "grad_norm": 0.2457973781644621, "learning_rate": 4.0364188163884676e-05, "loss": 0.3629, "num_tokens": 458500587.0, "step": 2404 }, { "epoch": 0.8209592080559822, "grad_norm": 0.2261156129753914, "learning_rate": 4.035786545270612e-05, "loss": 0.3967, "num_tokens": 458721164.0, "step": 2405 }, { "epoch": 0.821300563236047, "grad_norm": 0.21814355403448787, "learning_rate": 4.035154274152757e-05, "loss": 0.36, "num_tokens": 458933461.0, "step": 2406 }, { "epoch": 0.821641918416112, "grad_norm": 0.25498725480659074, "learning_rate": 4.034522003034902e-05, "loss": 0.355, "num_tokens": 459080611.0, "step": 2407 }, { "epoch": 0.8219832735961768, "grad_norm": 0.23921122469883532, "learning_rate": 4.033889731917046e-05, "loss": 0.3763, "num_tokens": 459273212.0, "step": 2408 }, { "epoch": 0.8223246287762417, "grad_norm": 0.21539960311886516, "learning_rate": 4.033257460799191e-05, "loss": 0.3723, "num_tokens": 459485906.0, "step": 2409 }, { "epoch": 0.8226659839563065, "grad_norm": 0.24885581937795406, "learning_rate": 4.032625189681336e-05, "loss": 0.4058, "num_tokens": 459656292.0, "step": 2410 }, { "epoch": 0.8230073391363714, "grad_norm": 0.251562804920763, "learning_rate": 4.0319929185634805e-05, "loss": 0.3704, "num_tokens": 459813302.0, "step": 2411 }, { "epoch": 0.8233486943164362, "grad_norm": 0.23837078879300883, "learning_rate": 4.031360647445625e-05, "loss": 0.3598, "num_tokens": 459975625.0, "step": 2412 }, { "epoch": 0.8236900494965012, "grad_norm": 0.22948251957852622, "learning_rate": 4.03072837632777e-05, "loss": 0.3349, "num_tokens": 460136272.0, "step": 2413 }, { "epoch": 0.824031404676566, "grad_norm": 0.23111340763704175, "learning_rate": 4.030096105209914e-05, "loss": 0.3724, "num_tokens": 460345373.0, "step": 2414 }, { "epoch": 0.8243727598566308, "grad_norm": 0.27015851840123395, "learning_rate": 4.0294638340920586e-05, "loss": 0.4028, "num_tokens": 460523852.0, "step": 2415 }, { "epoch": 0.8247141150366957, "grad_norm": 0.22436644367533282, "learning_rate": 4.028831562974203e-05, "loss": 0.3291, "num_tokens": 460696951.0, "step": 2416 }, { "epoch": 0.8250554702167605, "grad_norm": 0.2184551732479073, "learning_rate": 4.028199291856348e-05, "loss": 0.3779, "num_tokens": 460928894.0, "step": 2417 }, { "epoch": 0.8253968253968254, "grad_norm": 0.22393579365980837, "learning_rate": 4.027567020738493e-05, "loss": 0.3583, "num_tokens": 461137680.0, "step": 2418 }, { "epoch": 0.8257381805768903, "grad_norm": 0.23526120264206524, "learning_rate": 4.026934749620638e-05, "loss": 0.3523, "num_tokens": 461351921.0, "step": 2419 }, { "epoch": 0.8260795357569551, "grad_norm": 0.25482220901924396, "learning_rate": 4.026302478502782e-05, "loss": 0.3586, "num_tokens": 461481628.0, "step": 2420 }, { "epoch": 0.82642089093702, "grad_norm": 0.21359412981486106, "learning_rate": 4.025670207384927e-05, "loss": 0.362, "num_tokens": 461707597.0, "step": 2421 }, { "epoch": 0.8267622461170848, "grad_norm": 0.20774727415153327, "learning_rate": 4.0250379362670715e-05, "loss": 0.353, "num_tokens": 461922742.0, "step": 2422 }, { "epoch": 0.8271036012971497, "grad_norm": 0.22831389122411885, "learning_rate": 4.024405665149216e-05, "loss": 0.3977, "num_tokens": 462132944.0, "step": 2423 }, { "epoch": 0.8274449564772145, "grad_norm": 0.2309387856541029, "learning_rate": 4.023773394031361e-05, "loss": 0.3695, "num_tokens": 462326628.0, "step": 2424 }, { "epoch": 0.8277863116572794, "grad_norm": 0.227446763511147, "learning_rate": 4.0231411229135056e-05, "loss": 0.3522, "num_tokens": 462518432.0, "step": 2425 }, { "epoch": 0.8281276668373443, "grad_norm": 0.23408028692718025, "learning_rate": 4.02250885179565e-05, "loss": 0.3767, "num_tokens": 462713939.0, "step": 2426 }, { "epoch": 0.8284690220174091, "grad_norm": 0.22801555021413428, "learning_rate": 4.021876580677795e-05, "loss": 0.3321, "num_tokens": 462887336.0, "step": 2427 }, { "epoch": 0.828810377197474, "grad_norm": 0.22947982167924869, "learning_rate": 4.02124430955994e-05, "loss": 0.3453, "num_tokens": 463064204.0, "step": 2428 }, { "epoch": 0.8291517323775388, "grad_norm": 0.2418225173876075, "learning_rate": 4.0206120384420844e-05, "loss": 0.3478, "num_tokens": 463226373.0, "step": 2429 }, { "epoch": 0.8294930875576036, "grad_norm": 0.2412139354012248, "learning_rate": 4.019979767324229e-05, "loss": 0.3561, "num_tokens": 463409975.0, "step": 2430 }, { "epoch": 0.8298344427376686, "grad_norm": 0.22816037277794707, "learning_rate": 4.019347496206374e-05, "loss": 0.3962, "num_tokens": 463611264.0, "step": 2431 }, { "epoch": 0.8301757979177334, "grad_norm": 0.25009187102351427, "learning_rate": 4.018715225088518e-05, "loss": 0.3682, "num_tokens": 463807398.0, "step": 2432 }, { "epoch": 0.8305171530977983, "grad_norm": 0.23962257811274396, "learning_rate": 4.0180829539706625e-05, "loss": 0.3785, "num_tokens": 463976367.0, "step": 2433 }, { "epoch": 0.8308585082778631, "grad_norm": 0.23384158228492355, "learning_rate": 4.017450682852807e-05, "loss": 0.3616, "num_tokens": 464196616.0, "step": 2434 }, { "epoch": 0.831199863457928, "grad_norm": 0.23886930312174756, "learning_rate": 4.016818411734952e-05, "loss": 0.3593, "num_tokens": 464403288.0, "step": 2435 }, { "epoch": 0.8315412186379928, "grad_norm": 0.352191345325587, "learning_rate": 4.016186140617097e-05, "loss": 0.3864, "num_tokens": 464562701.0, "step": 2436 }, { "epoch": 0.8318825738180576, "grad_norm": 0.21734267410807148, "learning_rate": 4.015553869499242e-05, "loss": 0.38, "num_tokens": 464803774.0, "step": 2437 }, { "epoch": 0.8322239289981226, "grad_norm": 0.24114320210723933, "learning_rate": 4.014921598381386e-05, "loss": 0.4183, "num_tokens": 465012727.0, "step": 2438 }, { "epoch": 0.8325652841781874, "grad_norm": 0.2164672933510277, "learning_rate": 4.014289327263531e-05, "loss": 0.3302, "num_tokens": 465176802.0, "step": 2439 }, { "epoch": 0.8329066393582523, "grad_norm": 0.2131426295559042, "learning_rate": 4.0136570561456754e-05, "loss": 0.3724, "num_tokens": 465396525.0, "step": 2440 }, { "epoch": 0.8332479945383171, "grad_norm": 0.2671808539503567, "learning_rate": 4.01302478502782e-05, "loss": 0.3859, "num_tokens": 465538474.0, "step": 2441 }, { "epoch": 0.833589349718382, "grad_norm": 0.21589851410250063, "learning_rate": 4.012392513909965e-05, "loss": 0.3657, "num_tokens": 465749288.0, "step": 2442 }, { "epoch": 0.8339307048984468, "grad_norm": 0.2099158198844173, "learning_rate": 4.0117602427921095e-05, "loss": 0.3723, "num_tokens": 465966854.0, "step": 2443 }, { "epoch": 0.8342720600785117, "grad_norm": 0.21963785802436764, "learning_rate": 4.011127971674254e-05, "loss": 0.3943, "num_tokens": 466187632.0, "step": 2444 }, { "epoch": 0.8346134152585766, "grad_norm": 0.22017066090683501, "learning_rate": 4.010495700556399e-05, "loss": 0.3913, "num_tokens": 466391556.0, "step": 2445 }, { "epoch": 0.8349547704386414, "grad_norm": 0.24393225588558895, "learning_rate": 4.0098634294385436e-05, "loss": 0.3731, "num_tokens": 466606700.0, "step": 2446 }, { "epoch": 0.8352961256187063, "grad_norm": 0.21429273566668258, "learning_rate": 4.009231158320688e-05, "loss": 0.3527, "num_tokens": 466798051.0, "step": 2447 }, { "epoch": 0.8356374807987711, "grad_norm": 0.21278177818811406, "learning_rate": 4.008598887202833e-05, "loss": 0.3716, "num_tokens": 467006088.0, "step": 2448 }, { "epoch": 0.8359788359788359, "grad_norm": 0.20690253609984074, "learning_rate": 4.007966616084978e-05, "loss": 0.3493, "num_tokens": 467218732.0, "step": 2449 }, { "epoch": 0.8363201911589009, "grad_norm": 0.2866361345525707, "learning_rate": 4.007334344967122e-05, "loss": 0.3423, "num_tokens": 467380182.0, "step": 2450 }, { "epoch": 0.8366615463389657, "grad_norm": 0.23555666446674858, "learning_rate": 4.0067020738492664e-05, "loss": 0.348, "num_tokens": 467522177.0, "step": 2451 }, { "epoch": 0.8370029015190306, "grad_norm": 0.23671214854735342, "learning_rate": 4.006069802731411e-05, "loss": 0.3268, "num_tokens": 467672317.0, "step": 2452 }, { "epoch": 0.8373442566990954, "grad_norm": 0.2221234913385475, "learning_rate": 4.0054375316135565e-05, "loss": 0.3921, "num_tokens": 467878446.0, "step": 2453 }, { "epoch": 0.8376856118791602, "grad_norm": 0.22335423407366298, "learning_rate": 4.004805260495701e-05, "loss": 0.3469, "num_tokens": 468090336.0, "step": 2454 }, { "epoch": 0.8380269670592251, "grad_norm": 0.21658788372430537, "learning_rate": 4.004172989377846e-05, "loss": 0.3443, "num_tokens": 468262200.0, "step": 2455 }, { "epoch": 0.83836832223929, "grad_norm": 0.22783077440701924, "learning_rate": 4.00354071825999e-05, "loss": 0.3641, "num_tokens": 468432504.0, "step": 2456 }, { "epoch": 0.8387096774193549, "grad_norm": 0.2209058792888852, "learning_rate": 4.0029084471421346e-05, "loss": 0.3433, "num_tokens": 468624137.0, "step": 2457 }, { "epoch": 0.8390510325994197, "grad_norm": 0.23143444266961216, "learning_rate": 4.002276176024279e-05, "loss": 0.3931, "num_tokens": 468846551.0, "step": 2458 }, { "epoch": 0.8393923877794846, "grad_norm": 0.23822931874609524, "learning_rate": 4.001643904906424e-05, "loss": 0.3849, "num_tokens": 469007598.0, "step": 2459 }, { "epoch": 0.8397337429595494, "grad_norm": 0.20734988784841374, "learning_rate": 4.001011633788569e-05, "loss": 0.3493, "num_tokens": 469205403.0, "step": 2460 }, { "epoch": 0.8400750981396142, "grad_norm": 0.23774996696595713, "learning_rate": 4.0003793626707134e-05, "loss": 0.3448, "num_tokens": 469356215.0, "step": 2461 }, { "epoch": 0.8404164533196792, "grad_norm": 0.2272335258313039, "learning_rate": 3.999747091552858e-05, "loss": 0.3649, "num_tokens": 469537477.0, "step": 2462 }, { "epoch": 0.840757808499744, "grad_norm": 0.21807708854433464, "learning_rate": 3.999114820435003e-05, "loss": 0.3564, "num_tokens": 469730962.0, "step": 2463 }, { "epoch": 0.8410991636798089, "grad_norm": 0.26333792436527176, "learning_rate": 3.9984825493171475e-05, "loss": 0.3762, "num_tokens": 469899137.0, "step": 2464 }, { "epoch": 0.8414405188598737, "grad_norm": 0.21505612247014683, "learning_rate": 3.997850278199292e-05, "loss": 0.3753, "num_tokens": 470085826.0, "step": 2465 }, { "epoch": 0.8417818740399385, "grad_norm": 0.21982313282700608, "learning_rate": 3.997218007081437e-05, "loss": 0.3598, "num_tokens": 470295766.0, "step": 2466 }, { "epoch": 0.8421232292200034, "grad_norm": 0.22865327537805127, "learning_rate": 3.996585735963581e-05, "loss": 0.3707, "num_tokens": 470480647.0, "step": 2467 }, { "epoch": 0.8424645844000682, "grad_norm": 0.2070408274464669, "learning_rate": 3.9959534648457256e-05, "loss": 0.3498, "num_tokens": 470684322.0, "step": 2468 }, { "epoch": 0.8428059395801332, "grad_norm": 0.2215666658607193, "learning_rate": 3.99532119372787e-05, "loss": 0.3976, "num_tokens": 470889820.0, "step": 2469 }, { "epoch": 0.843147294760198, "grad_norm": 0.26678570716979566, "learning_rate": 3.9946889226100157e-05, "loss": 0.3536, "num_tokens": 471029339.0, "step": 2470 }, { "epoch": 0.8434886499402628, "grad_norm": 0.23643931472136473, "learning_rate": 3.9940566514921604e-05, "loss": 0.3727, "num_tokens": 471191715.0, "step": 2471 }, { "epoch": 0.8438300051203277, "grad_norm": 0.21349036527555307, "learning_rate": 3.993424380374305e-05, "loss": 0.3697, "num_tokens": 471398021.0, "step": 2472 }, { "epoch": 0.8441713603003925, "grad_norm": 0.22242914191391078, "learning_rate": 3.992792109256449e-05, "loss": 0.3397, "num_tokens": 471588400.0, "step": 2473 }, { "epoch": 0.8445127154804574, "grad_norm": 0.23210155924467496, "learning_rate": 3.992159838138594e-05, "loss": 0.3579, "num_tokens": 471746473.0, "step": 2474 }, { "epoch": 0.8448540706605223, "grad_norm": 0.2032716689760403, "learning_rate": 3.9915275670207385e-05, "loss": 0.3759, "num_tokens": 471987126.0, "step": 2475 }, { "epoch": 0.8451954258405872, "grad_norm": 0.2555950342221349, "learning_rate": 3.990895295902883e-05, "loss": 0.3995, "num_tokens": 472231355.0, "step": 2476 }, { "epoch": 0.845536781020652, "grad_norm": 0.21270749484409912, "learning_rate": 3.990263024785028e-05, "loss": 0.3742, "num_tokens": 472451038.0, "step": 2477 }, { "epoch": 0.8458781362007168, "grad_norm": 0.244989133877035, "learning_rate": 3.9896307536671726e-05, "loss": 0.3528, "num_tokens": 472623396.0, "step": 2478 }, { "epoch": 0.8462194913807817, "grad_norm": 0.22859457450802093, "learning_rate": 3.988998482549317e-05, "loss": 0.3391, "num_tokens": 472785633.0, "step": 2479 }, { "epoch": 0.8465608465608465, "grad_norm": 0.22238783637171805, "learning_rate": 3.988366211431462e-05, "loss": 0.3556, "num_tokens": 472987334.0, "step": 2480 }, { "epoch": 0.8469022017409115, "grad_norm": 0.23012681628466777, "learning_rate": 3.9877339403136067e-05, "loss": 0.3737, "num_tokens": 473180059.0, "step": 2481 }, { "epoch": 0.8472435569209763, "grad_norm": 0.22620809137349604, "learning_rate": 3.9871016691957514e-05, "loss": 0.3735, "num_tokens": 473401548.0, "step": 2482 }, { "epoch": 0.8475849121010411, "grad_norm": 0.24403572196733003, "learning_rate": 3.986469398077896e-05, "loss": 0.3955, "num_tokens": 473625861.0, "step": 2483 }, { "epoch": 0.847926267281106, "grad_norm": 0.22528369987567906, "learning_rate": 3.985837126960041e-05, "loss": 0.3583, "num_tokens": 473814160.0, "step": 2484 }, { "epoch": 0.8482676224611708, "grad_norm": 0.22361677570397867, "learning_rate": 3.985204855842185e-05, "loss": 0.3922, "num_tokens": 474021304.0, "step": 2485 }, { "epoch": 0.8486089776412357, "grad_norm": 0.2502905266066797, "learning_rate": 3.98457258472433e-05, "loss": 0.3752, "num_tokens": 474194706.0, "step": 2486 }, { "epoch": 0.8489503328213006, "grad_norm": 0.235379725509084, "learning_rate": 3.983940313606475e-05, "loss": 0.3815, "num_tokens": 474368839.0, "step": 2487 }, { "epoch": 0.8492916880013655, "grad_norm": 0.20745076519015193, "learning_rate": 3.9833080424886195e-05, "loss": 0.3427, "num_tokens": 474549129.0, "step": 2488 }, { "epoch": 0.8496330431814303, "grad_norm": 0.2518361331880008, "learning_rate": 3.982675771370764e-05, "loss": 0.4041, "num_tokens": 474721813.0, "step": 2489 }, { "epoch": 0.8499743983614951, "grad_norm": 0.2217475493126535, "learning_rate": 3.982043500252909e-05, "loss": 0.3582, "num_tokens": 474928493.0, "step": 2490 }, { "epoch": 0.85031575354156, "grad_norm": 0.24516512927762263, "learning_rate": 3.981411229135053e-05, "loss": 0.3617, "num_tokens": 475099995.0, "step": 2491 }, { "epoch": 0.8506571087216248, "grad_norm": 0.24214332786991474, "learning_rate": 3.980778958017198e-05, "loss": 0.4167, "num_tokens": 475313945.0, "step": 2492 }, { "epoch": 0.8509984639016898, "grad_norm": 0.2441999001616973, "learning_rate": 3.9801466868993424e-05, "loss": 0.388, "num_tokens": 475492654.0, "step": 2493 }, { "epoch": 0.8513398190817546, "grad_norm": 0.24973895080077507, "learning_rate": 3.979514415781487e-05, "loss": 0.3954, "num_tokens": 475655728.0, "step": 2494 }, { "epoch": 0.8516811742618194, "grad_norm": 0.23673588422104747, "learning_rate": 3.978882144663632e-05, "loss": 0.3483, "num_tokens": 475813004.0, "step": 2495 }, { "epoch": 0.8520225294418843, "grad_norm": 0.19994130652512343, "learning_rate": 3.978249873545777e-05, "loss": 0.3379, "num_tokens": 476006953.0, "step": 2496 }, { "epoch": 0.8523638846219491, "grad_norm": 0.2233179162788348, "learning_rate": 3.977617602427921e-05, "loss": 0.3709, "num_tokens": 476232032.0, "step": 2497 }, { "epoch": 0.852705239802014, "grad_norm": 0.26746314905623036, "learning_rate": 3.976985331310066e-05, "loss": 0.4007, "num_tokens": 476442274.0, "step": 2498 }, { "epoch": 0.8530465949820788, "grad_norm": 0.24423806272936288, "learning_rate": 3.9763530601922106e-05, "loss": 0.3838, "num_tokens": 476625509.0, "step": 2499 }, { "epoch": 0.8533879501621437, "grad_norm": 0.21833272476963106, "learning_rate": 3.975720789074355e-05, "loss": 0.3498, "num_tokens": 476809901.0, "step": 2500 }, { "epoch": 0.8537293053422086, "grad_norm": 0.2163893744010617, "learning_rate": 3.9750885179565e-05, "loss": 0.3713, "num_tokens": 477012071.0, "step": 2501 }, { "epoch": 0.8540706605222734, "grad_norm": 0.23954766764774887, "learning_rate": 3.9744562468386446e-05, "loss": 0.3746, "num_tokens": 477181704.0, "step": 2502 }, { "epoch": 0.8544120157023383, "grad_norm": 0.2197099939484837, "learning_rate": 3.9738239757207893e-05, "loss": 0.3742, "num_tokens": 477377105.0, "step": 2503 }, { "epoch": 0.8547533708824031, "grad_norm": 0.22945283868272745, "learning_rate": 3.973191704602934e-05, "loss": 0.3629, "num_tokens": 477561771.0, "step": 2504 }, { "epoch": 0.855094726062468, "grad_norm": 0.2276614888439586, "learning_rate": 3.972559433485079e-05, "loss": 0.3844, "num_tokens": 477770665.0, "step": 2505 }, { "epoch": 0.8554360812425329, "grad_norm": 0.20582884265656104, "learning_rate": 3.9719271623672234e-05, "loss": 0.3507, "num_tokens": 477976578.0, "step": 2506 }, { "epoch": 0.8557774364225977, "grad_norm": 0.2193671638025377, "learning_rate": 3.971294891249368e-05, "loss": 0.3621, "num_tokens": 478155856.0, "step": 2507 }, { "epoch": 0.8561187916026626, "grad_norm": 0.22261054064456118, "learning_rate": 3.970662620131513e-05, "loss": 0.3642, "num_tokens": 478360654.0, "step": 2508 }, { "epoch": 0.8564601467827274, "grad_norm": 0.24867137608739065, "learning_rate": 3.970030349013657e-05, "loss": 0.3824, "num_tokens": 478555665.0, "step": 2509 }, { "epoch": 0.8568015019627923, "grad_norm": 0.20345552908742118, "learning_rate": 3.9693980778958016e-05, "loss": 0.3735, "num_tokens": 478776090.0, "step": 2510 }, { "epoch": 0.8571428571428571, "grad_norm": 0.22639395821720978, "learning_rate": 3.968765806777946e-05, "loss": 0.3895, "num_tokens": 479039914.0, "step": 2511 }, { "epoch": 0.857484212322922, "grad_norm": 0.24537447770605922, "learning_rate": 3.968133535660091e-05, "loss": 0.3723, "num_tokens": 479228903.0, "step": 2512 }, { "epoch": 0.8578255675029869, "grad_norm": 0.2076422626308391, "learning_rate": 3.967501264542236e-05, "loss": 0.3823, "num_tokens": 479443919.0, "step": 2513 }, { "epoch": 0.8581669226830517, "grad_norm": 0.2102765162056105, "learning_rate": 3.966868993424381e-05, "loss": 0.3523, "num_tokens": 479626603.0, "step": 2514 }, { "epoch": 0.8585082778631166, "grad_norm": 0.22059285430223538, "learning_rate": 3.966236722306525e-05, "loss": 0.369, "num_tokens": 479801391.0, "step": 2515 }, { "epoch": 0.8588496330431814, "grad_norm": 0.21169761779927151, "learning_rate": 3.96560445118867e-05, "loss": 0.3795, "num_tokens": 480008875.0, "step": 2516 }, { "epoch": 0.8591909882232462, "grad_norm": 0.22457930510217922, "learning_rate": 3.9649721800708144e-05, "loss": 0.3656, "num_tokens": 480185633.0, "step": 2517 }, { "epoch": 0.8595323434033112, "grad_norm": 0.22949718533943692, "learning_rate": 3.964339908952959e-05, "loss": 0.3736, "num_tokens": 480372539.0, "step": 2518 }, { "epoch": 0.859873698583376, "grad_norm": 0.22910997607860734, "learning_rate": 3.963707637835104e-05, "loss": 0.3758, "num_tokens": 480550598.0, "step": 2519 }, { "epoch": 0.8602150537634409, "grad_norm": 0.2492603280014717, "learning_rate": 3.9630753667172485e-05, "loss": 0.3693, "num_tokens": 480745673.0, "step": 2520 }, { "epoch": 0.8605564089435057, "grad_norm": 0.22658337238210344, "learning_rate": 3.962443095599393e-05, "loss": 0.3829, "num_tokens": 480958600.0, "step": 2521 }, { "epoch": 0.8608977641235706, "grad_norm": 0.22435606382210077, "learning_rate": 3.961810824481538e-05, "loss": 0.3225, "num_tokens": 481155545.0, "step": 2522 }, { "epoch": 0.8612391193036354, "grad_norm": 0.24121325814775807, "learning_rate": 3.9611785533636826e-05, "loss": 0.3553, "num_tokens": 481319803.0, "step": 2523 }, { "epoch": 0.8615804744837003, "grad_norm": 0.23808902651937053, "learning_rate": 3.960546282245827e-05, "loss": 0.3806, "num_tokens": 481481060.0, "step": 2524 }, { "epoch": 0.8619218296637652, "grad_norm": 0.2156300476061187, "learning_rate": 3.959914011127972e-05, "loss": 0.3395, "num_tokens": 481650710.0, "step": 2525 }, { "epoch": 0.86226318484383, "grad_norm": 0.24010637213087652, "learning_rate": 3.959281740010117e-05, "loss": 0.3777, "num_tokens": 481817641.0, "step": 2526 }, { "epoch": 0.8626045400238949, "grad_norm": 0.2138670200652672, "learning_rate": 3.958649468892261e-05, "loss": 0.3677, "num_tokens": 482022165.0, "step": 2527 }, { "epoch": 0.8629458952039597, "grad_norm": 0.21971485575850472, "learning_rate": 3.9580171977744054e-05, "loss": 0.3731, "num_tokens": 482247441.0, "step": 2528 }, { "epoch": 0.8632872503840245, "grad_norm": 0.22795061058010155, "learning_rate": 3.95738492665655e-05, "loss": 0.3655, "num_tokens": 482431512.0, "step": 2529 }, { "epoch": 0.8636286055640894, "grad_norm": 0.22210099740565564, "learning_rate": 3.9567526555386955e-05, "loss": 0.3772, "num_tokens": 482612695.0, "step": 2530 }, { "epoch": 0.8639699607441543, "grad_norm": 0.22556174979208024, "learning_rate": 3.95612038442084e-05, "loss": 0.3791, "num_tokens": 482804028.0, "step": 2531 }, { "epoch": 0.8643113159242192, "grad_norm": 0.23371497947942768, "learning_rate": 3.955488113302985e-05, "loss": 0.3565, "num_tokens": 482978321.0, "step": 2532 }, { "epoch": 0.864652671104284, "grad_norm": 0.22544906660726532, "learning_rate": 3.954855842185129e-05, "loss": 0.412, "num_tokens": 483193624.0, "step": 2533 }, { "epoch": 0.8649940262843488, "grad_norm": 0.20914741433536485, "learning_rate": 3.9542235710672736e-05, "loss": 0.3474, "num_tokens": 483398433.0, "step": 2534 }, { "epoch": 0.8653353814644137, "grad_norm": 0.25642874634228546, "learning_rate": 3.953591299949418e-05, "loss": 0.4066, "num_tokens": 483623996.0, "step": 2535 }, { "epoch": 0.8656767366444785, "grad_norm": 0.22345178971416374, "learning_rate": 3.952959028831563e-05, "loss": 0.3694, "num_tokens": 483813235.0, "step": 2536 }, { "epoch": 0.8660180918245435, "grad_norm": 0.21931306097760353, "learning_rate": 3.952326757713708e-05, "loss": 0.373, "num_tokens": 484018296.0, "step": 2537 }, { "epoch": 0.8663594470046083, "grad_norm": 0.24407665440934317, "learning_rate": 3.9516944865958524e-05, "loss": 0.4065, "num_tokens": 484196268.0, "step": 2538 }, { "epoch": 0.8667008021846732, "grad_norm": 0.21733458767311895, "learning_rate": 3.951062215477997e-05, "loss": 0.3742, "num_tokens": 484377102.0, "step": 2539 }, { "epoch": 0.867042157364738, "grad_norm": 0.22188654861376633, "learning_rate": 3.950429944360142e-05, "loss": 0.3804, "num_tokens": 484560505.0, "step": 2540 }, { "epoch": 0.8673835125448028, "grad_norm": 0.2143821324922214, "learning_rate": 3.9497976732422865e-05, "loss": 0.3706, "num_tokens": 484757683.0, "step": 2541 }, { "epoch": 0.8677248677248677, "grad_norm": 0.20985337227755163, "learning_rate": 3.949165402124431e-05, "loss": 0.366, "num_tokens": 484951874.0, "step": 2542 }, { "epoch": 0.8680662229049326, "grad_norm": 0.26391902990582766, "learning_rate": 3.948533131006576e-05, "loss": 0.3834, "num_tokens": 485113741.0, "step": 2543 }, { "epoch": 0.8684075780849975, "grad_norm": 0.22174856329758225, "learning_rate": 3.9479008598887206e-05, "loss": 0.3485, "num_tokens": 485329507.0, "step": 2544 }, { "epoch": 0.8687489332650623, "grad_norm": 0.20295307069667004, "learning_rate": 3.9472685887708646e-05, "loss": 0.3737, "num_tokens": 485554859.0, "step": 2545 }, { "epoch": 0.8690902884451271, "grad_norm": 0.22113942502777473, "learning_rate": 3.9466363176530093e-05, "loss": 0.3444, "num_tokens": 485729673.0, "step": 2546 }, { "epoch": 0.869431643625192, "grad_norm": 0.22384371020464522, "learning_rate": 3.946004046535155e-05, "loss": 0.3805, "num_tokens": 485921837.0, "step": 2547 }, { "epoch": 0.8697729988052568, "grad_norm": 0.21826304949660907, "learning_rate": 3.9453717754172994e-05, "loss": 0.3727, "num_tokens": 486140274.0, "step": 2548 }, { "epoch": 0.8701143539853218, "grad_norm": 0.24618692329144182, "learning_rate": 3.944739504299444e-05, "loss": 0.3743, "num_tokens": 486318696.0, "step": 2549 }, { "epoch": 0.8704557091653866, "grad_norm": 0.20925524987027413, "learning_rate": 3.944107233181589e-05, "loss": 0.3445, "num_tokens": 486505378.0, "step": 2550 }, { "epoch": 0.8707970643454515, "grad_norm": 0.3092520749142711, "learning_rate": 3.943474962063733e-05, "loss": 0.3803, "num_tokens": 486685983.0, "step": 2551 }, { "epoch": 0.8711384195255163, "grad_norm": 0.25237440515503, "learning_rate": 3.9428426909458775e-05, "loss": 0.3931, "num_tokens": 486841801.0, "step": 2552 }, { "epoch": 0.8714797747055811, "grad_norm": 0.2935535544586812, "learning_rate": 3.942210419828022e-05, "loss": 0.3689, "num_tokens": 487013574.0, "step": 2553 }, { "epoch": 0.871821129885646, "grad_norm": 0.2070864726902593, "learning_rate": 3.941578148710167e-05, "loss": 0.3569, "num_tokens": 487242817.0, "step": 2554 }, { "epoch": 0.8721624850657109, "grad_norm": 0.19989983125827968, "learning_rate": 3.9409458775923116e-05, "loss": 0.3394, "num_tokens": 487459952.0, "step": 2555 }, { "epoch": 0.8725038402457758, "grad_norm": 0.24637252653332323, "learning_rate": 3.940313606474457e-05, "loss": 0.3725, "num_tokens": 487662434.0, "step": 2556 }, { "epoch": 0.8728451954258406, "grad_norm": 0.21393742117814618, "learning_rate": 3.939681335356601e-05, "loss": 0.3349, "num_tokens": 487821536.0, "step": 2557 }, { "epoch": 0.8731865506059054, "grad_norm": 0.21118065632502764, "learning_rate": 3.939049064238746e-05, "loss": 0.3576, "num_tokens": 488037529.0, "step": 2558 }, { "epoch": 0.8735279057859703, "grad_norm": 0.21658920616548474, "learning_rate": 3.9384167931208904e-05, "loss": 0.3862, "num_tokens": 488271026.0, "step": 2559 }, { "epoch": 0.8738692609660351, "grad_norm": 0.2267450558322642, "learning_rate": 3.937784522003035e-05, "loss": 0.3549, "num_tokens": 488441664.0, "step": 2560 }, { "epoch": 0.8742106161461001, "grad_norm": 0.24782602624832065, "learning_rate": 3.93715225088518e-05, "loss": 0.3322, "num_tokens": 488607168.0, "step": 2561 }, { "epoch": 0.8745519713261649, "grad_norm": 0.2213470467803673, "learning_rate": 3.9365199797673245e-05, "loss": 0.3564, "num_tokens": 488779116.0, "step": 2562 }, { "epoch": 0.8748933265062298, "grad_norm": 0.26786163865807383, "learning_rate": 3.935887708649469e-05, "loss": 0.3908, "num_tokens": 488960139.0, "step": 2563 }, { "epoch": 0.8752346816862946, "grad_norm": 0.2076469565023268, "learning_rate": 3.935255437531614e-05, "loss": 0.3773, "num_tokens": 489203676.0, "step": 2564 }, { "epoch": 0.8755760368663594, "grad_norm": 0.2340362320306027, "learning_rate": 3.9346231664137586e-05, "loss": 0.3698, "num_tokens": 489404160.0, "step": 2565 }, { "epoch": 0.8759173920464243, "grad_norm": 0.2527132990710158, "learning_rate": 3.933990895295903e-05, "loss": 0.3788, "num_tokens": 489558660.0, "step": 2566 }, { "epoch": 0.8762587472264891, "grad_norm": 0.21010855155618177, "learning_rate": 3.933358624178048e-05, "loss": 0.3291, "num_tokens": 489744303.0, "step": 2567 }, { "epoch": 0.8766001024065541, "grad_norm": 0.2213867466337714, "learning_rate": 3.932726353060193e-05, "loss": 0.3681, "num_tokens": 489947195.0, "step": 2568 }, { "epoch": 0.8769414575866189, "grad_norm": 0.20717000790951012, "learning_rate": 3.932094081942337e-05, "loss": 0.3622, "num_tokens": 490139806.0, "step": 2569 }, { "epoch": 0.8772828127666837, "grad_norm": 0.23137630258858438, "learning_rate": 3.9314618108244814e-05, "loss": 0.3857, "num_tokens": 490353554.0, "step": 2570 }, { "epoch": 0.8776241679467486, "grad_norm": 0.22358106621115922, "learning_rate": 3.930829539706626e-05, "loss": 0.3722, "num_tokens": 490582471.0, "step": 2571 }, { "epoch": 0.8779655231268134, "grad_norm": 0.21517097333170046, "learning_rate": 3.930197268588771e-05, "loss": 0.333, "num_tokens": 490774639.0, "step": 2572 }, { "epoch": 0.8783068783068783, "grad_norm": 0.38517567173234346, "learning_rate": 3.929564997470916e-05, "loss": 0.3513, "num_tokens": 490962833.0, "step": 2573 }, { "epoch": 0.8786482334869432, "grad_norm": 0.23159532612341413, "learning_rate": 3.928932726353061e-05, "loss": 0.3667, "num_tokens": 491157963.0, "step": 2574 }, { "epoch": 0.878989588667008, "grad_norm": 0.20071730083417214, "learning_rate": 3.928300455235205e-05, "loss": 0.3612, "num_tokens": 491374266.0, "step": 2575 }, { "epoch": 0.8793309438470729, "grad_norm": 0.22036226878272083, "learning_rate": 3.9276681841173496e-05, "loss": 0.3691, "num_tokens": 491573109.0, "step": 2576 }, { "epoch": 0.8796722990271377, "grad_norm": 0.22119155317725006, "learning_rate": 3.927035912999494e-05, "loss": 0.3632, "num_tokens": 491741203.0, "step": 2577 }, { "epoch": 0.8800136542072026, "grad_norm": 0.22516366843980481, "learning_rate": 3.926403641881639e-05, "loss": 0.3728, "num_tokens": 491906421.0, "step": 2578 }, { "epoch": 0.8803550093872674, "grad_norm": 0.21398898333721647, "learning_rate": 3.925771370763784e-05, "loss": 0.3692, "num_tokens": 492104927.0, "step": 2579 }, { "epoch": 0.8806963645673324, "grad_norm": 0.24001122517641585, "learning_rate": 3.9251390996459284e-05, "loss": 0.38, "num_tokens": 492267194.0, "step": 2580 }, { "epoch": 0.8810377197473972, "grad_norm": 0.21087537468024267, "learning_rate": 3.924506828528073e-05, "loss": 0.422, "num_tokens": 492487050.0, "step": 2581 }, { "epoch": 0.881379074927462, "grad_norm": 0.20221777028308555, "learning_rate": 3.923874557410218e-05, "loss": 0.3674, "num_tokens": 492697357.0, "step": 2582 }, { "epoch": 0.8817204301075269, "grad_norm": 0.2060629940696458, "learning_rate": 3.9232422862923625e-05, "loss": 0.3694, "num_tokens": 492940400.0, "step": 2583 }, { "epoch": 0.8820617852875917, "grad_norm": 0.22098503486996812, "learning_rate": 3.922610015174507e-05, "loss": 0.3337, "num_tokens": 493100440.0, "step": 2584 }, { "epoch": 0.8824031404676566, "grad_norm": 0.2148443828487524, "learning_rate": 3.921977744056652e-05, "loss": 0.3679, "num_tokens": 493316543.0, "step": 2585 }, { "epoch": 0.8827444956477215, "grad_norm": 0.23644007249304344, "learning_rate": 3.9213454729387966e-05, "loss": 0.3994, "num_tokens": 493497329.0, "step": 2586 }, { "epoch": 0.8830858508277863, "grad_norm": 0.23434947616552307, "learning_rate": 3.9207132018209406e-05, "loss": 0.4179, "num_tokens": 493725967.0, "step": 2587 }, { "epoch": 0.8834272060078512, "grad_norm": 0.26866718755621005, "learning_rate": 3.920080930703085e-05, "loss": 0.3636, "num_tokens": 493894493.0, "step": 2588 }, { "epoch": 0.883768561187916, "grad_norm": 0.22141938524252086, "learning_rate": 3.91944865958523e-05, "loss": 0.387, "num_tokens": 494109834.0, "step": 2589 }, { "epoch": 0.8841099163679809, "grad_norm": 0.21200462404083337, "learning_rate": 3.9188163884673754e-05, "loss": 0.3965, "num_tokens": 494355897.0, "step": 2590 }, { "epoch": 0.8844512715480457, "grad_norm": 0.20333963574466726, "learning_rate": 3.91818411734952e-05, "loss": 0.3575, "num_tokens": 494567150.0, "step": 2591 }, { "epoch": 0.8847926267281107, "grad_norm": 0.24237763289685543, "learning_rate": 3.917551846231665e-05, "loss": 0.3803, "num_tokens": 494751873.0, "step": 2592 }, { "epoch": 0.8851339819081755, "grad_norm": 0.22963761016439896, "learning_rate": 3.916919575113809e-05, "loss": 0.3363, "num_tokens": 494912618.0, "step": 2593 }, { "epoch": 0.8854753370882403, "grad_norm": 0.36468750526038013, "learning_rate": 3.9162873039959535e-05, "loss": 0.3594, "num_tokens": 495093654.0, "step": 2594 }, { "epoch": 0.8858166922683052, "grad_norm": 0.22074114426096828, "learning_rate": 3.915655032878098e-05, "loss": 0.3589, "num_tokens": 495330487.0, "step": 2595 }, { "epoch": 0.88615804744837, "grad_norm": 0.22381656208695547, "learning_rate": 3.915022761760243e-05, "loss": 0.3432, "num_tokens": 495523069.0, "step": 2596 }, { "epoch": 0.8864994026284349, "grad_norm": 0.23230720181267026, "learning_rate": 3.9143904906423876e-05, "loss": 0.3666, "num_tokens": 495691577.0, "step": 2597 }, { "epoch": 0.8868407578084997, "grad_norm": 0.206032461602472, "learning_rate": 3.913758219524532e-05, "loss": 0.3402, "num_tokens": 495914439.0, "step": 2598 }, { "epoch": 0.8871821129885646, "grad_norm": 0.21809259816072785, "learning_rate": 3.913125948406677e-05, "loss": 0.4172, "num_tokens": 496124084.0, "step": 2599 }, { "epoch": 0.8875234681686295, "grad_norm": 0.2360078396413248, "learning_rate": 3.912493677288822e-05, "loss": 0.3592, "num_tokens": 496278427.0, "step": 2600 }, { "epoch": 0.8878648233486943, "grad_norm": 0.21187709577405012, "learning_rate": 3.9118614061709664e-05, "loss": 0.3659, "num_tokens": 496482032.0, "step": 2601 }, { "epoch": 0.8882061785287592, "grad_norm": 0.2156469157297291, "learning_rate": 3.911229135053111e-05, "loss": 0.3217, "num_tokens": 496636966.0, "step": 2602 }, { "epoch": 0.888547533708824, "grad_norm": 0.22649671571531638, "learning_rate": 3.910596863935256e-05, "loss": 0.4039, "num_tokens": 496841554.0, "step": 2603 }, { "epoch": 0.8888888888888888, "grad_norm": 0.2231966939042725, "learning_rate": 3.9099645928174e-05, "loss": 0.3719, "num_tokens": 497056883.0, "step": 2604 }, { "epoch": 0.8892302440689538, "grad_norm": 0.23071996182496105, "learning_rate": 3.9093323216995445e-05, "loss": 0.4005, "num_tokens": 497243081.0, "step": 2605 }, { "epoch": 0.8895715992490186, "grad_norm": 0.20345321679084072, "learning_rate": 3.908700050581689e-05, "loss": 0.3342, "num_tokens": 497443909.0, "step": 2606 }, { "epoch": 0.8899129544290835, "grad_norm": 0.22810249991355588, "learning_rate": 3.9080677794638346e-05, "loss": 0.3821, "num_tokens": 497658244.0, "step": 2607 }, { "epoch": 0.8902543096091483, "grad_norm": 0.22090929210029187, "learning_rate": 3.907435508345979e-05, "loss": 0.371, "num_tokens": 497860138.0, "step": 2608 }, { "epoch": 0.8905956647892131, "grad_norm": 0.21535994624400598, "learning_rate": 3.906803237228124e-05, "loss": 0.3732, "num_tokens": 498058480.0, "step": 2609 }, { "epoch": 0.890937019969278, "grad_norm": 0.22332934317068961, "learning_rate": 3.906170966110268e-05, "loss": 0.3501, "num_tokens": 498261003.0, "step": 2610 }, { "epoch": 0.8912783751493429, "grad_norm": 0.2059556851474015, "learning_rate": 3.905538694992413e-05, "loss": 0.3577, "num_tokens": 498483432.0, "step": 2611 }, { "epoch": 0.8916197303294078, "grad_norm": 0.21920793602443664, "learning_rate": 3.9049064238745574e-05, "loss": 0.3859, "num_tokens": 498685259.0, "step": 2612 }, { "epoch": 0.8919610855094726, "grad_norm": 0.21373444425097918, "learning_rate": 3.904274152756702e-05, "loss": 0.376, "num_tokens": 498879004.0, "step": 2613 }, { "epoch": 0.8923024406895375, "grad_norm": 0.23034549350373543, "learning_rate": 3.903641881638847e-05, "loss": 0.3545, "num_tokens": 499056308.0, "step": 2614 }, { "epoch": 0.8926437958696023, "grad_norm": 0.21167016558846086, "learning_rate": 3.9030096105209915e-05, "loss": 0.3756, "num_tokens": 499246663.0, "step": 2615 }, { "epoch": 0.8929851510496671, "grad_norm": 0.23924682247435883, "learning_rate": 3.902377339403136e-05, "loss": 0.3649, "num_tokens": 499455189.0, "step": 2616 }, { "epoch": 0.8933265062297321, "grad_norm": 0.22536383511266797, "learning_rate": 3.901745068285281e-05, "loss": 0.3839, "num_tokens": 499665118.0, "step": 2617 }, { "epoch": 0.8936678614097969, "grad_norm": 0.20673112234132637, "learning_rate": 3.9011127971674256e-05, "loss": 0.3599, "num_tokens": 499873248.0, "step": 2618 }, { "epoch": 0.8940092165898618, "grad_norm": 0.21824197314925015, "learning_rate": 3.90048052604957e-05, "loss": 0.376, "num_tokens": 500064851.0, "step": 2619 }, { "epoch": 0.8943505717699266, "grad_norm": 0.22473510039892128, "learning_rate": 3.899848254931715e-05, "loss": 0.3405, "num_tokens": 500231574.0, "step": 2620 }, { "epoch": 0.8946919269499914, "grad_norm": 0.23105513943002431, "learning_rate": 3.89921598381386e-05, "loss": 0.3948, "num_tokens": 500437244.0, "step": 2621 }, { "epoch": 0.8950332821300563, "grad_norm": 0.20670001253886813, "learning_rate": 3.898583712696004e-05, "loss": 0.366, "num_tokens": 500688841.0, "step": 2622 }, { "epoch": 0.8953746373101212, "grad_norm": 0.2097195207654196, "learning_rate": 3.8979514415781484e-05, "loss": 0.3672, "num_tokens": 500894316.0, "step": 2623 }, { "epoch": 0.8957159924901861, "grad_norm": 0.22105612140681385, "learning_rate": 3.897319170460294e-05, "loss": 0.3867, "num_tokens": 501090674.0, "step": 2624 }, { "epoch": 0.8960573476702509, "grad_norm": 0.21744050197169001, "learning_rate": 3.8966868993424385e-05, "loss": 0.3461, "num_tokens": 501255769.0, "step": 2625 }, { "epoch": 0.8963987028503158, "grad_norm": 0.2342298246567359, "learning_rate": 3.896054628224583e-05, "loss": 0.3787, "num_tokens": 501452987.0, "step": 2626 }, { "epoch": 0.8967400580303806, "grad_norm": 0.2387882933306549, "learning_rate": 3.895422357106728e-05, "loss": 0.3856, "num_tokens": 501637941.0, "step": 2627 }, { "epoch": 0.8970814132104454, "grad_norm": 0.22020406129600276, "learning_rate": 3.894790085988872e-05, "loss": 0.3698, "num_tokens": 501832864.0, "step": 2628 }, { "epoch": 0.8974227683905103, "grad_norm": 0.24043710474924765, "learning_rate": 3.8941578148710166e-05, "loss": 0.3585, "num_tokens": 502056923.0, "step": 2629 }, { "epoch": 0.8977641235705752, "grad_norm": 0.23730024257978932, "learning_rate": 3.893525543753161e-05, "loss": 0.3907, "num_tokens": 502240204.0, "step": 2630 }, { "epoch": 0.8981054787506401, "grad_norm": 0.20639202580937197, "learning_rate": 3.892893272635306e-05, "loss": 0.3508, "num_tokens": 502465632.0, "step": 2631 }, { "epoch": 0.8984468339307049, "grad_norm": 0.20702293858063767, "learning_rate": 3.892261001517451e-05, "loss": 0.3469, "num_tokens": 502688666.0, "step": 2632 }, { "epoch": 0.8987881891107697, "grad_norm": 0.24764667505247806, "learning_rate": 3.891628730399596e-05, "loss": 0.3901, "num_tokens": 502872590.0, "step": 2633 }, { "epoch": 0.8991295442908346, "grad_norm": 0.22853288279386852, "learning_rate": 3.89099645928174e-05, "loss": 0.361, "num_tokens": 503047378.0, "step": 2634 }, { "epoch": 0.8994708994708994, "grad_norm": 0.22076340675373482, "learning_rate": 3.890364188163885e-05, "loss": 0.3671, "num_tokens": 503227184.0, "step": 2635 }, { "epoch": 0.8998122546509644, "grad_norm": 0.21786564737715833, "learning_rate": 3.8897319170460295e-05, "loss": 0.3595, "num_tokens": 503424639.0, "step": 2636 }, { "epoch": 0.9001536098310292, "grad_norm": 0.23578769455983375, "learning_rate": 3.889099645928174e-05, "loss": 0.3545, "num_tokens": 503605919.0, "step": 2637 }, { "epoch": 0.900494965011094, "grad_norm": 0.22017335769974294, "learning_rate": 3.888467374810319e-05, "loss": 0.3714, "num_tokens": 503790525.0, "step": 2638 }, { "epoch": 0.9008363201911589, "grad_norm": 0.21054068293454134, "learning_rate": 3.8878351036924636e-05, "loss": 0.3557, "num_tokens": 503954837.0, "step": 2639 }, { "epoch": 0.9011776753712237, "grad_norm": 0.22087056437137753, "learning_rate": 3.887202832574608e-05, "loss": 0.3714, "num_tokens": 504171179.0, "step": 2640 }, { "epoch": 0.9015190305512886, "grad_norm": 0.22546844933840843, "learning_rate": 3.886570561456753e-05, "loss": 0.3707, "num_tokens": 504331276.0, "step": 2641 }, { "epoch": 0.9018603857313535, "grad_norm": 0.20425968326422253, "learning_rate": 3.885938290338898e-05, "loss": 0.3418, "num_tokens": 504551972.0, "step": 2642 }, { "epoch": 0.9022017409114184, "grad_norm": 0.22091585494844346, "learning_rate": 3.8853060192210424e-05, "loss": 0.4092, "num_tokens": 504793986.0, "step": 2643 }, { "epoch": 0.9025430960914832, "grad_norm": 0.25929846865337614, "learning_rate": 3.884673748103187e-05, "loss": 0.3318, "num_tokens": 504995762.0, "step": 2644 }, { "epoch": 0.902884451271548, "grad_norm": 0.24336894467196024, "learning_rate": 3.884041476985332e-05, "loss": 0.3923, "num_tokens": 505177203.0, "step": 2645 }, { "epoch": 0.9032258064516129, "grad_norm": 0.23065456561699743, "learning_rate": 3.883409205867476e-05, "loss": 0.3822, "num_tokens": 505367838.0, "step": 2646 }, { "epoch": 0.9035671616316777, "grad_norm": 0.2293019508405011, "learning_rate": 3.8827769347496205e-05, "loss": 0.3491, "num_tokens": 505550990.0, "step": 2647 }, { "epoch": 0.9039085168117427, "grad_norm": 0.2431725891823782, "learning_rate": 3.882144663631765e-05, "loss": 0.3671, "num_tokens": 505732254.0, "step": 2648 }, { "epoch": 0.9042498719918075, "grad_norm": 0.19738792488087745, "learning_rate": 3.88151239251391e-05, "loss": 0.3468, "num_tokens": 505941063.0, "step": 2649 }, { "epoch": 0.9045912271718723, "grad_norm": 0.20678706696696628, "learning_rate": 3.880880121396055e-05, "loss": 0.3444, "num_tokens": 506138307.0, "step": 2650 }, { "epoch": 0.9049325823519372, "grad_norm": 0.24854471013339627, "learning_rate": 3.8802478502782e-05, "loss": 0.3796, "num_tokens": 506325347.0, "step": 2651 }, { "epoch": 0.905273937532002, "grad_norm": 0.2547728795136209, "learning_rate": 3.879615579160344e-05, "loss": 0.3878, "num_tokens": 506497151.0, "step": 2652 }, { "epoch": 0.9056152927120669, "grad_norm": 0.226182352121814, "learning_rate": 3.878983308042489e-05, "loss": 0.387, "num_tokens": 506709633.0, "step": 2653 }, { "epoch": 0.9059566478921318, "grad_norm": 0.21591959613448944, "learning_rate": 3.8783510369246334e-05, "loss": 0.3486, "num_tokens": 506898143.0, "step": 2654 }, { "epoch": 0.9062980030721967, "grad_norm": 0.20395824930188158, "learning_rate": 3.877718765806778e-05, "loss": 0.3496, "num_tokens": 507092028.0, "step": 2655 }, { "epoch": 0.9066393582522615, "grad_norm": 0.24866517660921225, "learning_rate": 3.877086494688923e-05, "loss": 0.363, "num_tokens": 507252897.0, "step": 2656 }, { "epoch": 0.9069807134323263, "grad_norm": 0.22507144422452105, "learning_rate": 3.8764542235710675e-05, "loss": 0.3769, "num_tokens": 507448687.0, "step": 2657 }, { "epoch": 0.9073220686123912, "grad_norm": 0.23126356837142226, "learning_rate": 3.875821952453212e-05, "loss": 0.351, "num_tokens": 507601571.0, "step": 2658 }, { "epoch": 0.907663423792456, "grad_norm": 0.23750347436435956, "learning_rate": 3.875189681335357e-05, "loss": 0.3738, "num_tokens": 507792184.0, "step": 2659 }, { "epoch": 0.9080047789725209, "grad_norm": 0.2341559676218211, "learning_rate": 3.8745574102175016e-05, "loss": 0.3402, "num_tokens": 507961715.0, "step": 2660 }, { "epoch": 0.9083461341525858, "grad_norm": 0.21243366807789588, "learning_rate": 3.873925139099646e-05, "loss": 0.3712, "num_tokens": 508167143.0, "step": 2661 }, { "epoch": 0.9086874893326506, "grad_norm": 0.24738160887421765, "learning_rate": 3.873292867981791e-05, "loss": 0.3447, "num_tokens": 508315588.0, "step": 2662 }, { "epoch": 0.9090288445127155, "grad_norm": 0.23089603156447625, "learning_rate": 3.8726605968639357e-05, "loss": 0.3915, "num_tokens": 508518502.0, "step": 2663 }, { "epoch": 0.9093701996927803, "grad_norm": 0.23355320766950113, "learning_rate": 3.87202832574608e-05, "loss": 0.362, "num_tokens": 508686188.0, "step": 2664 }, { "epoch": 0.9097115548728452, "grad_norm": 0.21301034896107565, "learning_rate": 3.8713960546282244e-05, "loss": 0.3811, "num_tokens": 508922707.0, "step": 2665 }, { "epoch": 0.91005291005291, "grad_norm": 0.21631570213840537, "learning_rate": 3.870763783510369e-05, "loss": 0.3548, "num_tokens": 509121664.0, "step": 2666 }, { "epoch": 0.910394265232975, "grad_norm": 0.24816924140521912, "learning_rate": 3.8701315123925144e-05, "loss": 0.381, "num_tokens": 509280671.0, "step": 2667 }, { "epoch": 0.9107356204130398, "grad_norm": 0.2145396602232451, "learning_rate": 3.869499241274659e-05, "loss": 0.3405, "num_tokens": 509486824.0, "step": 2668 }, { "epoch": 0.9110769755931046, "grad_norm": 0.22010570843336558, "learning_rate": 3.868866970156804e-05, "loss": 0.3539, "num_tokens": 509680133.0, "step": 2669 }, { "epoch": 0.9114183307731695, "grad_norm": 0.2101944097171032, "learning_rate": 3.868234699038948e-05, "loss": 0.3746, "num_tokens": 509902436.0, "step": 2670 }, { "epoch": 0.9117596859532343, "grad_norm": 0.22170966513577403, "learning_rate": 3.8676024279210926e-05, "loss": 0.357, "num_tokens": 510083370.0, "step": 2671 }, { "epoch": 0.9121010411332992, "grad_norm": 0.24979637818826705, "learning_rate": 3.866970156803237e-05, "loss": 0.3765, "num_tokens": 510268041.0, "step": 2672 }, { "epoch": 0.9124423963133641, "grad_norm": 0.21750499334660472, "learning_rate": 3.866337885685382e-05, "loss": 0.3427, "num_tokens": 510463733.0, "step": 2673 }, { "epoch": 0.9127837514934289, "grad_norm": 0.23445724705561408, "learning_rate": 3.8657056145675267e-05, "loss": 0.3466, "num_tokens": 510625941.0, "step": 2674 }, { "epoch": 0.9131251066734938, "grad_norm": 0.21606860879056222, "learning_rate": 3.8650733434496714e-05, "loss": 0.408, "num_tokens": 510863059.0, "step": 2675 }, { "epoch": 0.9134664618535586, "grad_norm": 0.23407380310193746, "learning_rate": 3.864441072331816e-05, "loss": 0.3591, "num_tokens": 511052844.0, "step": 2676 }, { "epoch": 0.9138078170336235, "grad_norm": 0.23522101129746734, "learning_rate": 3.863808801213961e-05, "loss": 0.3711, "num_tokens": 511245684.0, "step": 2677 }, { "epoch": 0.9141491722136883, "grad_norm": 0.23556374269105929, "learning_rate": 3.8631765300961055e-05, "loss": 0.3937, "num_tokens": 511454788.0, "step": 2678 }, { "epoch": 0.9144905273937532, "grad_norm": 0.24117595142218648, "learning_rate": 3.86254425897825e-05, "loss": 0.3558, "num_tokens": 511614571.0, "step": 2679 }, { "epoch": 0.9148318825738181, "grad_norm": 0.2659666596114304, "learning_rate": 3.861911987860395e-05, "loss": 0.4291, "num_tokens": 511831576.0, "step": 2680 }, { "epoch": 0.9151732377538829, "grad_norm": 0.24143856109954023, "learning_rate": 3.8612797167425395e-05, "loss": 0.3507, "num_tokens": 512009019.0, "step": 2681 }, { "epoch": 0.9155145929339478, "grad_norm": 0.22740605802013225, "learning_rate": 3.8606474456246836e-05, "loss": 0.3547, "num_tokens": 512171238.0, "step": 2682 }, { "epoch": 0.9158559481140126, "grad_norm": 0.2550303351845146, "learning_rate": 3.860015174506828e-05, "loss": 0.3891, "num_tokens": 512356356.0, "step": 2683 }, { "epoch": 0.9161973032940774, "grad_norm": 0.23874817373667281, "learning_rate": 3.8593829033889736e-05, "loss": 0.3826, "num_tokens": 512556700.0, "step": 2684 }, { "epoch": 0.9165386584741424, "grad_norm": 0.22074615113934257, "learning_rate": 3.8587506322711183e-05, "loss": 0.3591, "num_tokens": 512747128.0, "step": 2685 }, { "epoch": 0.9168800136542072, "grad_norm": 0.22104360505508427, "learning_rate": 3.858118361153263e-05, "loss": 0.3531, "num_tokens": 512911079.0, "step": 2686 }, { "epoch": 0.9172213688342721, "grad_norm": 0.2244557315069807, "learning_rate": 3.857486090035408e-05, "loss": 0.3331, "num_tokens": 513104721.0, "step": 2687 }, { "epoch": 0.9175627240143369, "grad_norm": 0.2154547387595662, "learning_rate": 3.856853818917552e-05, "loss": 0.3492, "num_tokens": 513309395.0, "step": 2688 }, { "epoch": 0.9179040791944018, "grad_norm": 0.21486861976175936, "learning_rate": 3.8562215477996965e-05, "loss": 0.3596, "num_tokens": 513490488.0, "step": 2689 }, { "epoch": 0.9182454343744666, "grad_norm": 0.2466274874646934, "learning_rate": 3.855589276681841e-05, "loss": 0.3916, "num_tokens": 513676804.0, "step": 2690 }, { "epoch": 0.9185867895545314, "grad_norm": 0.21623401499330136, "learning_rate": 3.854957005563986e-05, "loss": 0.3276, "num_tokens": 513862687.0, "step": 2691 }, { "epoch": 0.9189281447345964, "grad_norm": 0.22276335014196688, "learning_rate": 3.8543247344461305e-05, "loss": 0.366, "num_tokens": 514065025.0, "step": 2692 }, { "epoch": 0.9192694999146612, "grad_norm": 0.21613904486012006, "learning_rate": 3.853692463328276e-05, "loss": 0.3869, "num_tokens": 514259836.0, "step": 2693 }, { "epoch": 0.9196108550947261, "grad_norm": 0.22976735928446512, "learning_rate": 3.85306019221042e-05, "loss": 0.3715, "num_tokens": 514419517.0, "step": 2694 }, { "epoch": 0.9199522102747909, "grad_norm": 0.22327388558356104, "learning_rate": 3.8524279210925646e-05, "loss": 0.3348, "num_tokens": 514604288.0, "step": 2695 }, { "epoch": 0.9202935654548557, "grad_norm": 0.21721159855139519, "learning_rate": 3.8517956499747093e-05, "loss": 0.3569, "num_tokens": 514784164.0, "step": 2696 }, { "epoch": 0.9206349206349206, "grad_norm": 0.22385383537395295, "learning_rate": 3.851163378856854e-05, "loss": 0.4019, "num_tokens": 514995978.0, "step": 2697 }, { "epoch": 0.9209762758149855, "grad_norm": 0.2170464905099808, "learning_rate": 3.850531107738999e-05, "loss": 0.3828, "num_tokens": 515206749.0, "step": 2698 }, { "epoch": 0.9213176309950504, "grad_norm": 0.22943902862535562, "learning_rate": 3.8498988366211434e-05, "loss": 0.3755, "num_tokens": 515372734.0, "step": 2699 }, { "epoch": 0.9216589861751152, "grad_norm": 0.22143644939538298, "learning_rate": 3.8492665655032875e-05, "loss": 0.4024, "num_tokens": 515566454.0, "step": 2700 }, { "epoch": 0.92200034135518, "grad_norm": 0.22130868095171805, "learning_rate": 3.848634294385433e-05, "loss": 0.3414, "num_tokens": 515739481.0, "step": 2701 }, { "epoch": 0.9223416965352449, "grad_norm": 0.2253877794915387, "learning_rate": 3.8480020232675775e-05, "loss": 0.3795, "num_tokens": 515914708.0, "step": 2702 }, { "epoch": 0.9226830517153097, "grad_norm": 0.20276714999296946, "learning_rate": 3.847369752149722e-05, "loss": 0.37, "num_tokens": 516145144.0, "step": 2703 }, { "epoch": 0.9230244068953747, "grad_norm": 0.2171921346066868, "learning_rate": 3.846737481031867e-05, "loss": 0.371, "num_tokens": 516325194.0, "step": 2704 }, { "epoch": 0.9233657620754395, "grad_norm": 0.21680178450360663, "learning_rate": 3.8461052099140116e-05, "loss": 0.3844, "num_tokens": 516522052.0, "step": 2705 }, { "epoch": 0.9237071172555044, "grad_norm": 0.21469223705093943, "learning_rate": 3.8454729387961556e-05, "loss": 0.368, "num_tokens": 516724656.0, "step": 2706 }, { "epoch": 0.9240484724355692, "grad_norm": 0.2809962487522932, "learning_rate": 3.8448406676783003e-05, "loss": 0.4482, "num_tokens": 516888641.0, "step": 2707 }, { "epoch": 0.924389827615634, "grad_norm": 0.2189439623107301, "learning_rate": 3.844208396560445e-05, "loss": 0.3615, "num_tokens": 517073888.0, "step": 2708 }, { "epoch": 0.9247311827956989, "grad_norm": 0.20612261838089535, "learning_rate": 3.84357612544259e-05, "loss": 0.3678, "num_tokens": 517282155.0, "step": 2709 }, { "epoch": 0.9250725379757638, "grad_norm": 0.21520060167990943, "learning_rate": 3.842943854324735e-05, "loss": 0.381, "num_tokens": 517486198.0, "step": 2710 }, { "epoch": 0.9254138931558287, "grad_norm": 0.21592455736511143, "learning_rate": 3.84231158320688e-05, "loss": 0.3811, "num_tokens": 517678826.0, "step": 2711 }, { "epoch": 0.9257552483358935, "grad_norm": 0.27477181567993625, "learning_rate": 3.841679312089024e-05, "loss": 0.3685, "num_tokens": 517843381.0, "step": 2712 }, { "epoch": 0.9260966035159583, "grad_norm": 0.20056079305016902, "learning_rate": 3.8410470409711685e-05, "loss": 0.3418, "num_tokens": 518036254.0, "step": 2713 }, { "epoch": 0.9264379586960232, "grad_norm": 0.24187336154917086, "learning_rate": 3.840414769853313e-05, "loss": 0.3823, "num_tokens": 518215513.0, "step": 2714 }, { "epoch": 0.926779313876088, "grad_norm": 0.21131907646022544, "learning_rate": 3.839782498735458e-05, "loss": 0.3494, "num_tokens": 518390045.0, "step": 2715 }, { "epoch": 0.927120669056153, "grad_norm": 0.2249242910693754, "learning_rate": 3.8391502276176026e-05, "loss": 0.3592, "num_tokens": 518559570.0, "step": 2716 }, { "epoch": 0.9274620242362178, "grad_norm": 0.2619948648845189, "learning_rate": 3.838517956499747e-05, "loss": 0.3708, "num_tokens": 518714191.0, "step": 2717 }, { "epoch": 0.9278033794162827, "grad_norm": 0.2338734178035673, "learning_rate": 3.837885685381892e-05, "loss": 0.3881, "num_tokens": 518927068.0, "step": 2718 }, { "epoch": 0.9281447345963475, "grad_norm": 0.22706811573307176, "learning_rate": 3.837253414264037e-05, "loss": 0.4062, "num_tokens": 519130049.0, "step": 2719 }, { "epoch": 0.9284860897764123, "grad_norm": 0.2270976969320413, "learning_rate": 3.8366211431461814e-05, "loss": 0.3588, "num_tokens": 519307999.0, "step": 2720 }, { "epoch": 0.9288274449564772, "grad_norm": 0.222323067329852, "learning_rate": 3.835988872028326e-05, "loss": 0.3518, "num_tokens": 519483463.0, "step": 2721 }, { "epoch": 0.929168800136542, "grad_norm": 0.2071727756968163, "learning_rate": 3.835356600910471e-05, "loss": 0.3508, "num_tokens": 519659867.0, "step": 2722 }, { "epoch": 0.929510155316607, "grad_norm": 0.2307903292037166, "learning_rate": 3.8347243297926155e-05, "loss": 0.3788, "num_tokens": 519838088.0, "step": 2723 }, { "epoch": 0.9298515104966718, "grad_norm": 0.22449770985881454, "learning_rate": 3.8340920586747595e-05, "loss": 0.3474, "num_tokens": 520022391.0, "step": 2724 }, { "epoch": 0.9301928656767366, "grad_norm": 0.2602866986609461, "learning_rate": 3.833459787556904e-05, "loss": 0.3621, "num_tokens": 520188047.0, "step": 2725 }, { "epoch": 0.9305342208568015, "grad_norm": 0.21754569648434716, "learning_rate": 3.832827516439049e-05, "loss": 0.3506, "num_tokens": 520379705.0, "step": 2726 }, { "epoch": 0.9308755760368663, "grad_norm": 0.23810184127929798, "learning_rate": 3.832195245321194e-05, "loss": 0.3929, "num_tokens": 520589476.0, "step": 2727 }, { "epoch": 0.9312169312169312, "grad_norm": 0.239946825844079, "learning_rate": 3.831562974203339e-05, "loss": 0.3649, "num_tokens": 520779009.0, "step": 2728 }, { "epoch": 0.9315582863969961, "grad_norm": 0.2263471956909442, "learning_rate": 3.830930703085483e-05, "loss": 0.3548, "num_tokens": 520983214.0, "step": 2729 }, { "epoch": 0.931899641577061, "grad_norm": 0.21522844749011233, "learning_rate": 3.830298431967628e-05, "loss": 0.351, "num_tokens": 521162676.0, "step": 2730 }, { "epoch": 0.9322409967571258, "grad_norm": 0.396647396110867, "learning_rate": 3.8296661608497724e-05, "loss": 0.3563, "num_tokens": 521343511.0, "step": 2731 }, { "epoch": 0.9325823519371906, "grad_norm": 0.22449663241823845, "learning_rate": 3.829033889731917e-05, "loss": 0.3746, "num_tokens": 521533019.0, "step": 2732 }, { "epoch": 0.9329237071172555, "grad_norm": 0.2355788501462454, "learning_rate": 3.828401618614062e-05, "loss": 0.3576, "num_tokens": 521724459.0, "step": 2733 }, { "epoch": 0.9332650622973203, "grad_norm": 0.22335855460471185, "learning_rate": 3.8277693474962065e-05, "loss": 0.3753, "num_tokens": 521927616.0, "step": 2734 }, { "epoch": 0.9336064174773853, "grad_norm": 0.2142168471589911, "learning_rate": 3.827137076378351e-05, "loss": 0.3233, "num_tokens": 522129308.0, "step": 2735 }, { "epoch": 0.9339477726574501, "grad_norm": 0.2321503935489579, "learning_rate": 3.826504805260496e-05, "loss": 0.3381, "num_tokens": 522308382.0, "step": 2736 }, { "epoch": 0.9342891278375149, "grad_norm": 0.21212917509468068, "learning_rate": 3.8258725341426406e-05, "loss": 0.3491, "num_tokens": 522520820.0, "step": 2737 }, { "epoch": 0.9346304830175798, "grad_norm": 0.20905213655517377, "learning_rate": 3.825240263024785e-05, "loss": 0.3728, "num_tokens": 522723861.0, "step": 2738 }, { "epoch": 0.9349718381976446, "grad_norm": 0.21967352635241325, "learning_rate": 3.82460799190693e-05, "loss": 0.3891, "num_tokens": 522934174.0, "step": 2739 }, { "epoch": 0.9353131933777095, "grad_norm": 0.2599881317307799, "learning_rate": 3.823975720789075e-05, "loss": 0.4149, "num_tokens": 523131069.0, "step": 2740 }, { "epoch": 0.9356545485577744, "grad_norm": 0.21131544882576872, "learning_rate": 3.823343449671219e-05, "loss": 0.356, "num_tokens": 523345869.0, "step": 2741 }, { "epoch": 0.9359959037378393, "grad_norm": 0.36985554872145326, "learning_rate": 3.8227111785533634e-05, "loss": 0.3735, "num_tokens": 523523895.0, "step": 2742 }, { "epoch": 0.9363372589179041, "grad_norm": 0.2651358126536886, "learning_rate": 3.822078907435508e-05, "loss": 0.3782, "num_tokens": 523693596.0, "step": 2743 }, { "epoch": 0.9366786140979689, "grad_norm": 0.22689125840674126, "learning_rate": 3.8214466363176535e-05, "loss": 0.362, "num_tokens": 523871971.0, "step": 2744 }, { "epoch": 0.9370199692780338, "grad_norm": 0.23941169407131516, "learning_rate": 3.820814365199798e-05, "loss": 0.3713, "num_tokens": 524037555.0, "step": 2745 }, { "epoch": 0.9373613244580986, "grad_norm": 0.23448085732208268, "learning_rate": 3.820182094081943e-05, "loss": 0.3869, "num_tokens": 524232903.0, "step": 2746 }, { "epoch": 0.9377026796381636, "grad_norm": 0.2678060653971704, "learning_rate": 3.819549822964087e-05, "loss": 0.4121, "num_tokens": 524494535.0, "step": 2747 }, { "epoch": 0.9380440348182284, "grad_norm": 0.22214369694142133, "learning_rate": 3.8189175518462316e-05, "loss": 0.3807, "num_tokens": 524697777.0, "step": 2748 }, { "epoch": 0.9383853899982932, "grad_norm": 0.19525384113266783, "learning_rate": 3.818285280728376e-05, "loss": 0.3267, "num_tokens": 524875420.0, "step": 2749 }, { "epoch": 0.9387267451783581, "grad_norm": 0.22061480240922493, "learning_rate": 3.817653009610521e-05, "loss": 0.3625, "num_tokens": 525088223.0, "step": 2750 }, { "epoch": 0.9390681003584229, "grad_norm": 0.24221306964702533, "learning_rate": 3.817020738492666e-05, "loss": 0.3678, "num_tokens": 525293496.0, "step": 2751 }, { "epoch": 0.9394094555384878, "grad_norm": 0.23780175937105974, "learning_rate": 3.8163884673748104e-05, "loss": 0.4044, "num_tokens": 525512060.0, "step": 2752 }, { "epoch": 0.9397508107185527, "grad_norm": 0.24739130725628075, "learning_rate": 3.815756196256955e-05, "loss": 0.3669, "num_tokens": 525667512.0, "step": 2753 }, { "epoch": 0.9400921658986175, "grad_norm": 0.21132499110544897, "learning_rate": 3.8151239251391e-05, "loss": 0.3544, "num_tokens": 525893223.0, "step": 2754 }, { "epoch": 0.9404335210786824, "grad_norm": 0.23820144435685373, "learning_rate": 3.8144916540212445e-05, "loss": 0.3981, "num_tokens": 526102760.0, "step": 2755 }, { "epoch": 0.9407748762587472, "grad_norm": 0.21636127755094908, "learning_rate": 3.813859382903389e-05, "loss": 0.3521, "num_tokens": 526282865.0, "step": 2756 }, { "epoch": 0.9411162314388121, "grad_norm": 0.21873317074897491, "learning_rate": 3.813227111785534e-05, "loss": 0.3794, "num_tokens": 526473676.0, "step": 2757 }, { "epoch": 0.9414575866188769, "grad_norm": 0.22700444807798165, "learning_rate": 3.8125948406676786e-05, "loss": 0.3544, "num_tokens": 526656276.0, "step": 2758 }, { "epoch": 0.9417989417989417, "grad_norm": 0.22793234930338963, "learning_rate": 3.8119625695498226e-05, "loss": 0.3886, "num_tokens": 526827028.0, "step": 2759 }, { "epoch": 0.9421402969790067, "grad_norm": 0.22344899100559626, "learning_rate": 3.811330298431967e-05, "loss": 0.386, "num_tokens": 527009635.0, "step": 2760 }, { "epoch": 0.9424816521590715, "grad_norm": 0.2152407680159212, "learning_rate": 3.810698027314113e-05, "loss": 0.3684, "num_tokens": 527216123.0, "step": 2761 }, { "epoch": 0.9428230073391364, "grad_norm": 0.20642988769626236, "learning_rate": 3.8100657561962574e-05, "loss": 0.3667, "num_tokens": 527435004.0, "step": 2762 }, { "epoch": 0.9431643625192012, "grad_norm": 0.20087480713024566, "learning_rate": 3.809433485078402e-05, "loss": 0.3466, "num_tokens": 527643660.0, "step": 2763 }, { "epoch": 0.943505717699266, "grad_norm": 0.20853695908864875, "learning_rate": 3.808801213960547e-05, "loss": 0.3476, "num_tokens": 527836899.0, "step": 2764 }, { "epoch": 0.9438470728793309, "grad_norm": 0.22656934851516772, "learning_rate": 3.808168942842691e-05, "loss": 0.3933, "num_tokens": 528052777.0, "step": 2765 }, { "epoch": 0.9441884280593958, "grad_norm": 0.22975441993789217, "learning_rate": 3.8075366717248355e-05, "loss": 0.4079, "num_tokens": 528241730.0, "step": 2766 }, { "epoch": 0.9445297832394607, "grad_norm": 0.2306026255475665, "learning_rate": 3.80690440060698e-05, "loss": 0.3674, "num_tokens": 528433612.0, "step": 2767 }, { "epoch": 0.9448711384195255, "grad_norm": 0.23418866940746585, "learning_rate": 3.806272129489125e-05, "loss": 0.372, "num_tokens": 528613609.0, "step": 2768 }, { "epoch": 0.9452124935995904, "grad_norm": 0.2783094278762465, "learning_rate": 3.8056398583712696e-05, "loss": 0.3993, "num_tokens": 528802119.0, "step": 2769 }, { "epoch": 0.9455538487796552, "grad_norm": 0.22369408240273955, "learning_rate": 3.805007587253415e-05, "loss": 0.3332, "num_tokens": 528961469.0, "step": 2770 }, { "epoch": 0.94589520395972, "grad_norm": 0.21103971057290455, "learning_rate": 3.804375316135559e-05, "loss": 0.3563, "num_tokens": 529155423.0, "step": 2771 }, { "epoch": 0.946236559139785, "grad_norm": 0.22112127070712823, "learning_rate": 3.803743045017704e-05, "loss": 0.3713, "num_tokens": 529355262.0, "step": 2772 }, { "epoch": 0.9465779143198498, "grad_norm": 0.2686322851878094, "learning_rate": 3.8031107738998484e-05, "loss": 0.3809, "num_tokens": 529523658.0, "step": 2773 }, { "epoch": 0.9469192694999147, "grad_norm": 0.21770930863759624, "learning_rate": 3.802478502781993e-05, "loss": 0.3205, "num_tokens": 529686337.0, "step": 2774 }, { "epoch": 0.9472606246799795, "grad_norm": 0.22557408480881605, "learning_rate": 3.801846231664138e-05, "loss": 0.3718, "num_tokens": 529882649.0, "step": 2775 }, { "epoch": 0.9476019798600444, "grad_norm": 0.2418813196292411, "learning_rate": 3.8012139605462825e-05, "loss": 0.423, "num_tokens": 530093428.0, "step": 2776 }, { "epoch": 0.9479433350401092, "grad_norm": 0.22193775457608925, "learning_rate": 3.8005816894284265e-05, "loss": 0.3555, "num_tokens": 530271623.0, "step": 2777 }, { "epoch": 0.9482846902201741, "grad_norm": 0.22629056069541797, "learning_rate": 3.799949418310572e-05, "loss": 0.3452, "num_tokens": 530433623.0, "step": 2778 }, { "epoch": 0.948626045400239, "grad_norm": 0.22454072103131126, "learning_rate": 3.7993171471927166e-05, "loss": 0.3923, "num_tokens": 530604818.0, "step": 2779 }, { "epoch": 0.9489674005803038, "grad_norm": 0.21698478125031007, "learning_rate": 3.798684876074861e-05, "loss": 0.3629, "num_tokens": 530802847.0, "step": 2780 }, { "epoch": 0.9493087557603687, "grad_norm": 0.23588650220777138, "learning_rate": 3.798052604957006e-05, "loss": 0.3388, "num_tokens": 530970998.0, "step": 2781 }, { "epoch": 0.9496501109404335, "grad_norm": 0.22138433578389197, "learning_rate": 3.797420333839151e-05, "loss": 0.3885, "num_tokens": 531178108.0, "step": 2782 }, { "epoch": 0.9499914661204983, "grad_norm": 0.22809436857919976, "learning_rate": 3.796788062721295e-05, "loss": 0.367, "num_tokens": 531363361.0, "step": 2783 }, { "epoch": 0.9503328213005633, "grad_norm": 0.2279091219245756, "learning_rate": 3.7961557916034394e-05, "loss": 0.3758, "num_tokens": 531555068.0, "step": 2784 }, { "epoch": 0.9506741764806281, "grad_norm": 0.2641665645855961, "learning_rate": 3.795523520485584e-05, "loss": 0.3965, "num_tokens": 531782136.0, "step": 2785 }, { "epoch": 0.951015531660693, "grad_norm": 0.24156109242056942, "learning_rate": 3.794891249367729e-05, "loss": 0.4023, "num_tokens": 531936601.0, "step": 2786 }, { "epoch": 0.9513568868407578, "grad_norm": 0.23015333141893782, "learning_rate": 3.794258978249874e-05, "loss": 0.3859, "num_tokens": 532104117.0, "step": 2787 }, { "epoch": 0.9516982420208226, "grad_norm": 0.20881370116938544, "learning_rate": 3.793626707132019e-05, "loss": 0.3596, "num_tokens": 532312397.0, "step": 2788 }, { "epoch": 0.9520395972008875, "grad_norm": 0.21728881061746516, "learning_rate": 3.792994436014163e-05, "loss": 0.3807, "num_tokens": 532502432.0, "step": 2789 }, { "epoch": 0.9523809523809523, "grad_norm": 0.22209927972431484, "learning_rate": 3.7923621648963076e-05, "loss": 0.3575, "num_tokens": 532695341.0, "step": 2790 }, { "epoch": 0.9527223075610173, "grad_norm": 0.21809167078951625, "learning_rate": 3.791729893778452e-05, "loss": 0.3912, "num_tokens": 532913141.0, "step": 2791 }, { "epoch": 0.9530636627410821, "grad_norm": 0.2215821076715585, "learning_rate": 3.791097622660597e-05, "loss": 0.3468, "num_tokens": 533086111.0, "step": 2792 }, { "epoch": 0.953405017921147, "grad_norm": 0.2546415434725773, "learning_rate": 3.790465351542742e-05, "loss": 0.3582, "num_tokens": 533280003.0, "step": 2793 }, { "epoch": 0.9537463731012118, "grad_norm": 0.23730241338781222, "learning_rate": 3.7898330804248864e-05, "loss": 0.3683, "num_tokens": 533447825.0, "step": 2794 }, { "epoch": 0.9540877282812766, "grad_norm": 0.2121688620807683, "learning_rate": 3.789200809307031e-05, "loss": 0.3778, "num_tokens": 533647864.0, "step": 2795 }, { "epoch": 0.9544290834613415, "grad_norm": 0.2320858574721386, "learning_rate": 3.788568538189176e-05, "loss": 0.3698, "num_tokens": 533832494.0, "step": 2796 }, { "epoch": 0.9547704386414064, "grad_norm": 0.21941091884984365, "learning_rate": 3.7879362670713205e-05, "loss": 0.3539, "num_tokens": 534040628.0, "step": 2797 }, { "epoch": 0.9551117938214713, "grad_norm": 0.22966306787317936, "learning_rate": 3.787303995953465e-05, "loss": 0.3496, "num_tokens": 534206599.0, "step": 2798 }, { "epoch": 0.9554531490015361, "grad_norm": 0.22980493340070093, "learning_rate": 3.78667172483561e-05, "loss": 0.3642, "num_tokens": 534367391.0, "step": 2799 }, { "epoch": 0.955794504181601, "grad_norm": 0.2290788374375667, "learning_rate": 3.7860394537177546e-05, "loss": 0.3616, "num_tokens": 534585862.0, "step": 2800 }, { "epoch": 0.9561358593616658, "grad_norm": 0.18403348269165684, "learning_rate": 3.7854071825998986e-05, "loss": 0.3443, "num_tokens": 534835074.0, "step": 2801 }, { "epoch": 0.9564772145417306, "grad_norm": 0.22630610657911987, "learning_rate": 3.784774911482043e-05, "loss": 0.3293, "num_tokens": 535000051.0, "step": 2802 }, { "epoch": 0.9568185697217956, "grad_norm": 0.2319240120382919, "learning_rate": 3.784142640364188e-05, "loss": 0.3607, "num_tokens": 535175627.0, "step": 2803 }, { "epoch": 0.9571599249018604, "grad_norm": 0.21306431640821, "learning_rate": 3.7835103692463334e-05, "loss": 0.3416, "num_tokens": 535341068.0, "step": 2804 }, { "epoch": 0.9575012800819253, "grad_norm": 0.22521042835343164, "learning_rate": 3.782878098128478e-05, "loss": 0.3796, "num_tokens": 535541915.0, "step": 2805 }, { "epoch": 0.9578426352619901, "grad_norm": 0.23691860742302942, "learning_rate": 3.782245827010623e-05, "loss": 0.363, "num_tokens": 535749650.0, "step": 2806 }, { "epoch": 0.9581839904420549, "grad_norm": 0.21441953779696726, "learning_rate": 3.781613555892767e-05, "loss": 0.3569, "num_tokens": 535946146.0, "step": 2807 }, { "epoch": 0.9585253456221198, "grad_norm": 0.21874900502676195, "learning_rate": 3.7809812847749115e-05, "loss": 0.379, "num_tokens": 536168835.0, "step": 2808 }, { "epoch": 0.9588667008021847, "grad_norm": 0.2433769337621987, "learning_rate": 3.780349013657056e-05, "loss": 0.3687, "num_tokens": 536373871.0, "step": 2809 }, { "epoch": 0.9592080559822496, "grad_norm": 0.2110599709216501, "learning_rate": 3.779716742539201e-05, "loss": 0.3568, "num_tokens": 536581996.0, "step": 2810 }, { "epoch": 0.9595494111623144, "grad_norm": 0.22854314889338626, "learning_rate": 3.7790844714213456e-05, "loss": 0.3545, "num_tokens": 536753477.0, "step": 2811 }, { "epoch": 0.9598907663423792, "grad_norm": 0.21974159947524574, "learning_rate": 3.77845220030349e-05, "loss": 0.3594, "num_tokens": 536961947.0, "step": 2812 }, { "epoch": 0.9602321215224441, "grad_norm": 0.21538311980732006, "learning_rate": 3.777819929185635e-05, "loss": 0.3852, "num_tokens": 537177801.0, "step": 2813 }, { "epoch": 0.9605734767025089, "grad_norm": 0.22089016935155803, "learning_rate": 3.77718765806778e-05, "loss": 0.3704, "num_tokens": 537365411.0, "step": 2814 }, { "epoch": 0.9609148318825739, "grad_norm": 0.22274143315064043, "learning_rate": 3.7765553869499244e-05, "loss": 0.3846, "num_tokens": 537575123.0, "step": 2815 }, { "epoch": 0.9612561870626387, "grad_norm": 0.22684987675396182, "learning_rate": 3.775923115832069e-05, "loss": 0.3603, "num_tokens": 537753779.0, "step": 2816 }, { "epoch": 0.9615975422427036, "grad_norm": 0.2249935458059804, "learning_rate": 3.775290844714214e-05, "loss": 0.3687, "num_tokens": 537940714.0, "step": 2817 }, { "epoch": 0.9619388974227684, "grad_norm": 0.22710089976294798, "learning_rate": 3.7746585735963585e-05, "loss": 0.3488, "num_tokens": 538112281.0, "step": 2818 }, { "epoch": 0.9622802526028332, "grad_norm": 0.22382826315889823, "learning_rate": 3.7740263024785025e-05, "loss": 0.3569, "num_tokens": 538289581.0, "step": 2819 }, { "epoch": 0.9626216077828981, "grad_norm": 0.22860036252265645, "learning_rate": 3.773394031360647e-05, "loss": 0.3394, "num_tokens": 538467636.0, "step": 2820 }, { "epoch": 0.9629629629629629, "grad_norm": 0.22635158174077855, "learning_rate": 3.7727617602427926e-05, "loss": 0.3685, "num_tokens": 538645946.0, "step": 2821 }, { "epoch": 0.9633043181430279, "grad_norm": 0.2063314725486789, "learning_rate": 3.772129489124937e-05, "loss": 0.3861, "num_tokens": 538855381.0, "step": 2822 }, { "epoch": 0.9636456733230927, "grad_norm": 0.20891979584693549, "learning_rate": 3.771497218007082e-05, "loss": 0.3406, "num_tokens": 539031805.0, "step": 2823 }, { "epoch": 0.9639870285031575, "grad_norm": 0.2070948563085365, "learning_rate": 3.7708649468892267e-05, "loss": 0.3854, "num_tokens": 539276269.0, "step": 2824 }, { "epoch": 0.9643283836832224, "grad_norm": 0.2364394905268655, "learning_rate": 3.770232675771371e-05, "loss": 0.3503, "num_tokens": 539428050.0, "step": 2825 }, { "epoch": 0.9646697388632872, "grad_norm": 0.22077954884694298, "learning_rate": 3.7696004046535154e-05, "loss": 0.3652, "num_tokens": 539604261.0, "step": 2826 }, { "epoch": 0.9650110940433521, "grad_norm": 0.23661040122441865, "learning_rate": 3.76896813353566e-05, "loss": 0.3482, "num_tokens": 539734686.0, "step": 2827 }, { "epoch": 0.965352449223417, "grad_norm": 0.22232473491278645, "learning_rate": 3.768335862417805e-05, "loss": 0.3647, "num_tokens": 539920826.0, "step": 2828 }, { "epoch": 0.9656938044034818, "grad_norm": 0.23839284917929984, "learning_rate": 3.7677035912999495e-05, "loss": 0.3934, "num_tokens": 540101375.0, "step": 2829 }, { "epoch": 0.9660351595835467, "grad_norm": 0.22598415208010536, "learning_rate": 3.767071320182095e-05, "loss": 0.367, "num_tokens": 540280417.0, "step": 2830 }, { "epoch": 0.9663765147636115, "grad_norm": 0.22937975681118505, "learning_rate": 3.766439049064239e-05, "loss": 0.375, "num_tokens": 540462222.0, "step": 2831 }, { "epoch": 0.9667178699436764, "grad_norm": 0.25392937663396425, "learning_rate": 3.7658067779463836e-05, "loss": 0.3988, "num_tokens": 540656441.0, "step": 2832 }, { "epoch": 0.9670592251237412, "grad_norm": 0.23308889375347397, "learning_rate": 3.765174506828528e-05, "loss": 0.3942, "num_tokens": 540838487.0, "step": 2833 }, { "epoch": 0.9674005803038062, "grad_norm": 0.2472635698678165, "learning_rate": 3.764542235710673e-05, "loss": 0.3752, "num_tokens": 541018232.0, "step": 2834 }, { "epoch": 0.967741935483871, "grad_norm": 0.2052378832278407, "learning_rate": 3.763909964592818e-05, "loss": 0.3291, "num_tokens": 541212554.0, "step": 2835 }, { "epoch": 0.9680832906639358, "grad_norm": 0.22820833760329667, "learning_rate": 3.7632776934749624e-05, "loss": 0.3576, "num_tokens": 541381002.0, "step": 2836 }, { "epoch": 0.9684246458440007, "grad_norm": 0.22056768911770244, "learning_rate": 3.7626454223571064e-05, "loss": 0.3487, "num_tokens": 541573222.0, "step": 2837 }, { "epoch": 0.9687660010240655, "grad_norm": 0.3682754016186081, "learning_rate": 3.762013151239252e-05, "loss": 0.3612, "num_tokens": 541730938.0, "step": 2838 }, { "epoch": 0.9691073562041304, "grad_norm": 0.21639440175553853, "learning_rate": 3.7613808801213965e-05, "loss": 0.3656, "num_tokens": 541950457.0, "step": 2839 }, { "epoch": 0.9694487113841953, "grad_norm": 0.23117080028165327, "learning_rate": 3.760748609003541e-05, "loss": 0.3834, "num_tokens": 542143008.0, "step": 2840 }, { "epoch": 0.9697900665642601, "grad_norm": 0.24473905835542467, "learning_rate": 3.760116337885686e-05, "loss": 0.3482, "num_tokens": 542315404.0, "step": 2841 }, { "epoch": 0.970131421744325, "grad_norm": 1.2571701467233574, "learning_rate": 3.7594840667678306e-05, "loss": 0.3461, "num_tokens": 542538726.0, "step": 2842 }, { "epoch": 0.9704727769243898, "grad_norm": 0.24876515332453683, "learning_rate": 3.7588517956499746e-05, "loss": 0.3925, "num_tokens": 542736248.0, "step": 2843 }, { "epoch": 0.9708141321044547, "grad_norm": 0.22127490893033416, "learning_rate": 3.758219524532119e-05, "loss": 0.3804, "num_tokens": 542936439.0, "step": 2844 }, { "epoch": 0.9711554872845195, "grad_norm": 0.2369549734011968, "learning_rate": 3.757587253414264e-05, "loss": 0.3957, "num_tokens": 543115108.0, "step": 2845 }, { "epoch": 0.9714968424645845, "grad_norm": 0.2287057591534389, "learning_rate": 3.756954982296409e-05, "loss": 0.4254, "num_tokens": 543357245.0, "step": 2846 }, { "epoch": 0.9718381976446493, "grad_norm": 0.25401271495990657, "learning_rate": 3.756322711178554e-05, "loss": 0.3828, "num_tokens": 543516749.0, "step": 2847 }, { "epoch": 0.9721795528247141, "grad_norm": 0.22786722246794125, "learning_rate": 3.755690440060699e-05, "loss": 0.3817, "num_tokens": 543709483.0, "step": 2848 }, { "epoch": 0.972520908004779, "grad_norm": 0.1982578739469099, "learning_rate": 3.755058168942843e-05, "loss": 0.3764, "num_tokens": 543913091.0, "step": 2849 }, { "epoch": 0.9728622631848438, "grad_norm": 0.2249544712993064, "learning_rate": 3.7544258978249875e-05, "loss": 0.3444, "num_tokens": 544076968.0, "step": 2850 }, { "epoch": 0.9732036183649087, "grad_norm": 0.2241508923179094, "learning_rate": 3.753793626707132e-05, "loss": 0.3485, "num_tokens": 544271005.0, "step": 2851 }, { "epoch": 0.9735449735449735, "grad_norm": 0.2379747943163486, "learning_rate": 3.753161355589277e-05, "loss": 0.3654, "num_tokens": 544445309.0, "step": 2852 }, { "epoch": 0.9738863287250384, "grad_norm": 0.23344324630226768, "learning_rate": 3.7525290844714216e-05, "loss": 0.3965, "num_tokens": 544630452.0, "step": 2853 }, { "epoch": 0.9742276839051033, "grad_norm": 0.21664647009580013, "learning_rate": 3.751896813353566e-05, "loss": 0.3826, "num_tokens": 544840259.0, "step": 2854 }, { "epoch": 0.9745690390851681, "grad_norm": 0.2565922607624552, "learning_rate": 3.751264542235711e-05, "loss": 0.3623, "num_tokens": 544981379.0, "step": 2855 }, { "epoch": 0.974910394265233, "grad_norm": 0.23388095709383916, "learning_rate": 3.7506322711178557e-05, "loss": 0.3311, "num_tokens": 545122320.0, "step": 2856 }, { "epoch": 0.9752517494452978, "grad_norm": 0.2805641449465393, "learning_rate": 3.7500000000000003e-05, "loss": 0.3681, "num_tokens": 545292771.0, "step": 2857 }, { "epoch": 0.9755931046253626, "grad_norm": 0.24771844237206447, "learning_rate": 3.749367728882145e-05, "loss": 0.3631, "num_tokens": 545479028.0, "step": 2858 }, { "epoch": 0.9759344598054276, "grad_norm": 0.2342445747530488, "learning_rate": 3.74873545776429e-05, "loss": 0.4122, "num_tokens": 545706433.0, "step": 2859 }, { "epoch": 0.9762758149854924, "grad_norm": 0.23590712789989052, "learning_rate": 3.748103186646434e-05, "loss": 0.3624, "num_tokens": 545887665.0, "step": 2860 }, { "epoch": 0.9766171701655573, "grad_norm": 0.23624974267330243, "learning_rate": 3.7474709155285785e-05, "loss": 0.3744, "num_tokens": 546069341.0, "step": 2861 }, { "epoch": 0.9769585253456221, "grad_norm": 0.22982125184594945, "learning_rate": 3.746838644410723e-05, "loss": 0.3476, "num_tokens": 546243887.0, "step": 2862 }, { "epoch": 0.977299880525687, "grad_norm": 0.22128205524341307, "learning_rate": 3.746206373292868e-05, "loss": 0.3402, "num_tokens": 546425709.0, "step": 2863 }, { "epoch": 0.9776412357057518, "grad_norm": 0.23140925754456013, "learning_rate": 3.745574102175013e-05, "loss": 0.3624, "num_tokens": 546601205.0, "step": 2864 }, { "epoch": 0.9779825908858167, "grad_norm": 0.20829154279341056, "learning_rate": 3.744941831057158e-05, "loss": 0.3838, "num_tokens": 546838724.0, "step": 2865 }, { "epoch": 0.9783239460658816, "grad_norm": 0.22249056313690985, "learning_rate": 3.744309559939302e-05, "loss": 0.3803, "num_tokens": 547031710.0, "step": 2866 }, { "epoch": 0.9786653012459464, "grad_norm": 0.20441805695468165, "learning_rate": 3.7436772888214467e-05, "loss": 0.3594, "num_tokens": 547263152.0, "step": 2867 }, { "epoch": 0.9790066564260113, "grad_norm": 0.24458184408204967, "learning_rate": 3.7430450177035914e-05, "loss": 0.3824, "num_tokens": 547450712.0, "step": 2868 }, { "epoch": 0.9793480116060761, "grad_norm": 0.25196547303376965, "learning_rate": 3.742412746585736e-05, "loss": 0.3832, "num_tokens": 547613282.0, "step": 2869 }, { "epoch": 0.9796893667861409, "grad_norm": 0.23840948747102556, "learning_rate": 3.741780475467881e-05, "loss": 0.3817, "num_tokens": 547795667.0, "step": 2870 }, { "epoch": 0.9800307219662059, "grad_norm": 0.21684928007939594, "learning_rate": 3.7411482043500254e-05, "loss": 0.3418, "num_tokens": 547977115.0, "step": 2871 }, { "epoch": 0.9803720771462707, "grad_norm": 0.24509923861154645, "learning_rate": 3.74051593323217e-05, "loss": 0.3958, "num_tokens": 548169877.0, "step": 2872 }, { "epoch": 0.9807134323263356, "grad_norm": 0.23182484865033032, "learning_rate": 3.739883662114315e-05, "loss": 0.3808, "num_tokens": 548363744.0, "step": 2873 }, { "epoch": 0.9810547875064004, "grad_norm": 0.22937103409684265, "learning_rate": 3.7392513909964595e-05, "loss": 0.3445, "num_tokens": 548516332.0, "step": 2874 }, { "epoch": 0.9813961426864652, "grad_norm": 0.24269892557433012, "learning_rate": 3.738619119878604e-05, "loss": 0.4142, "num_tokens": 548722694.0, "step": 2875 }, { "epoch": 0.9817374978665301, "grad_norm": 0.22942508511305112, "learning_rate": 3.737986848760749e-05, "loss": 0.3374, "num_tokens": 548888543.0, "step": 2876 }, { "epoch": 0.982078853046595, "grad_norm": 0.22632151438317022, "learning_rate": 3.7373545776428936e-05, "loss": 0.3891, "num_tokens": 549093223.0, "step": 2877 }, { "epoch": 0.9824202082266599, "grad_norm": 0.19737051474251407, "learning_rate": 3.7367223065250377e-05, "loss": 0.32, "num_tokens": 549299786.0, "step": 2878 }, { "epoch": 0.9827615634067247, "grad_norm": 0.2457454383657706, "learning_rate": 3.7360900354071824e-05, "loss": 0.3819, "num_tokens": 549489228.0, "step": 2879 }, { "epoch": 0.9831029185867896, "grad_norm": 0.22088285461376725, "learning_rate": 3.735457764289327e-05, "loss": 0.3619, "num_tokens": 549681485.0, "step": 2880 }, { "epoch": 0.9834442737668544, "grad_norm": 0.22515414476895002, "learning_rate": 3.7348254931714724e-05, "loss": 0.3576, "num_tokens": 549842545.0, "step": 2881 }, { "epoch": 0.9837856289469192, "grad_norm": 0.2408505414436993, "learning_rate": 3.734193222053617e-05, "loss": 0.3711, "num_tokens": 550031240.0, "step": 2882 }, { "epoch": 0.9841269841269841, "grad_norm": 0.2340232645940762, "learning_rate": 3.733560950935762e-05, "loss": 0.3704, "num_tokens": 550205792.0, "step": 2883 }, { "epoch": 0.984468339307049, "grad_norm": 0.22480462265604528, "learning_rate": 3.732928679817906e-05, "loss": 0.3321, "num_tokens": 550379213.0, "step": 2884 }, { "epoch": 0.9848096944871139, "grad_norm": 0.22432102211877758, "learning_rate": 3.7322964087000505e-05, "loss": 0.3757, "num_tokens": 550587879.0, "step": 2885 }, { "epoch": 0.9851510496671787, "grad_norm": 0.2194750271657409, "learning_rate": 3.731664137582195e-05, "loss": 0.3645, "num_tokens": 550786308.0, "step": 2886 }, { "epoch": 0.9854924048472435, "grad_norm": 0.21263398786820098, "learning_rate": 3.73103186646434e-05, "loss": 0.36, "num_tokens": 550989037.0, "step": 2887 }, { "epoch": 0.9858337600273084, "grad_norm": 0.22441105481315354, "learning_rate": 3.7303995953464846e-05, "loss": 0.4191, "num_tokens": 551246387.0, "step": 2888 }, { "epoch": 0.9861751152073732, "grad_norm": 0.22884237811687302, "learning_rate": 3.7297673242286293e-05, "loss": 0.3825, "num_tokens": 551422452.0, "step": 2889 }, { "epoch": 0.9865164703874382, "grad_norm": 0.26141723634463876, "learning_rate": 3.729135053110774e-05, "loss": 0.3693, "num_tokens": 551604784.0, "step": 2890 }, { "epoch": 0.986857825567503, "grad_norm": 0.22887568568444927, "learning_rate": 3.728502781992919e-05, "loss": 0.3603, "num_tokens": 551787749.0, "step": 2891 }, { "epoch": 0.9871991807475679, "grad_norm": 0.20607476064073826, "learning_rate": 3.7278705108750634e-05, "loss": 0.3296, "num_tokens": 551970621.0, "step": 2892 }, { "epoch": 0.9875405359276327, "grad_norm": 0.20741346691564674, "learning_rate": 3.727238239757208e-05, "loss": 0.3506, "num_tokens": 552175024.0, "step": 2893 }, { "epoch": 0.9878818911076975, "grad_norm": 0.22091895645303297, "learning_rate": 3.726605968639353e-05, "loss": 0.3351, "num_tokens": 552341340.0, "step": 2894 }, { "epoch": 0.9882232462877624, "grad_norm": 0.23322063148121952, "learning_rate": 3.7259736975214975e-05, "loss": 0.3941, "num_tokens": 552585022.0, "step": 2895 }, { "epoch": 0.9885646014678273, "grad_norm": 0.23292927220459073, "learning_rate": 3.7253414264036416e-05, "loss": 0.3359, "num_tokens": 552737597.0, "step": 2896 }, { "epoch": 0.9889059566478922, "grad_norm": 0.21007238139987958, "learning_rate": 3.724709155285786e-05, "loss": 0.3651, "num_tokens": 552954641.0, "step": 2897 }, { "epoch": 0.989247311827957, "grad_norm": 0.22619489195219042, "learning_rate": 3.7240768841679316e-05, "loss": 0.3681, "num_tokens": 553151927.0, "step": 2898 }, { "epoch": 0.9895886670080218, "grad_norm": 0.20702228470863565, "learning_rate": 3.723444613050076e-05, "loss": 0.3422, "num_tokens": 553367524.0, "step": 2899 }, { "epoch": 0.9899300221880867, "grad_norm": 0.221897928273781, "learning_rate": 3.722812341932221e-05, "loss": 0.345, "num_tokens": 553523999.0, "step": 2900 }, { "epoch": 0.9902713773681515, "grad_norm": 0.22795847710542289, "learning_rate": 3.722180070814366e-05, "loss": 0.377, "num_tokens": 553712122.0, "step": 2901 }, { "epoch": 0.9906127325482165, "grad_norm": 0.23578492363522552, "learning_rate": 3.72154779969651e-05, "loss": 0.3663, "num_tokens": 553892257.0, "step": 2902 }, { "epoch": 0.9909540877282813, "grad_norm": 0.2104389187509755, "learning_rate": 3.7209155285786544e-05, "loss": 0.3206, "num_tokens": 554061028.0, "step": 2903 }, { "epoch": 0.9912954429083461, "grad_norm": 0.22379526708186157, "learning_rate": 3.720283257460799e-05, "loss": 0.3753, "num_tokens": 554242834.0, "step": 2904 }, { "epoch": 0.991636798088411, "grad_norm": 0.22086083408909954, "learning_rate": 3.719650986342944e-05, "loss": 0.3717, "num_tokens": 554424369.0, "step": 2905 }, { "epoch": 0.9919781532684758, "grad_norm": 0.20083263504980972, "learning_rate": 3.7190187152250885e-05, "loss": 0.3576, "num_tokens": 554640677.0, "step": 2906 }, { "epoch": 0.9923195084485407, "grad_norm": 0.23136084999565326, "learning_rate": 3.718386444107234e-05, "loss": 0.3698, "num_tokens": 554798500.0, "step": 2907 }, { "epoch": 0.9926608636286056, "grad_norm": 0.2265688055447823, "learning_rate": 3.717754172989378e-05, "loss": 0.3943, "num_tokens": 554982445.0, "step": 2908 }, { "epoch": 0.9930022188086705, "grad_norm": 0.23219576870973413, "learning_rate": 3.7171219018715226e-05, "loss": 0.4163, "num_tokens": 555157876.0, "step": 2909 }, { "epoch": 0.9933435739887353, "grad_norm": 0.2250277720385747, "learning_rate": 3.716489630753667e-05, "loss": 0.3663, "num_tokens": 555350659.0, "step": 2910 }, { "epoch": 0.9936849291688001, "grad_norm": 0.22571287777484925, "learning_rate": 3.715857359635812e-05, "loss": 0.3967, "num_tokens": 555547221.0, "step": 2911 }, { "epoch": 0.994026284348865, "grad_norm": 0.21037437455383184, "learning_rate": 3.715225088517957e-05, "loss": 0.3854, "num_tokens": 555746379.0, "step": 2912 }, { "epoch": 0.9943676395289298, "grad_norm": 0.22760130141499038, "learning_rate": 3.7145928174001014e-05, "loss": 0.3945, "num_tokens": 555932576.0, "step": 2913 }, { "epoch": 0.9947089947089947, "grad_norm": 0.22543066229878644, "learning_rate": 3.7139605462822454e-05, "loss": 0.3657, "num_tokens": 556102214.0, "step": 2914 }, { "epoch": 0.9950503498890596, "grad_norm": 0.23522219953978557, "learning_rate": 3.713328275164391e-05, "loss": 0.3411, "num_tokens": 556275674.0, "step": 2915 }, { "epoch": 0.9953917050691244, "grad_norm": 0.20496010107097912, "learning_rate": 3.7126960040465355e-05, "loss": 0.3439, "num_tokens": 556483371.0, "step": 2916 }, { "epoch": 0.9957330602491893, "grad_norm": 0.2230575582983119, "learning_rate": 3.71206373292868e-05, "loss": 0.3729, "num_tokens": 556707162.0, "step": 2917 }, { "epoch": 0.9960744154292541, "grad_norm": 0.2156562638802919, "learning_rate": 3.711431461810825e-05, "loss": 0.3916, "num_tokens": 556925172.0, "step": 2918 }, { "epoch": 0.996415770609319, "grad_norm": 0.21898784248749806, "learning_rate": 3.7107991906929696e-05, "loss": 0.3401, "num_tokens": 557113515.0, "step": 2919 }, { "epoch": 0.9967571257893838, "grad_norm": 0.21716766632805318, "learning_rate": 3.7101669195751136e-05, "loss": 0.3619, "num_tokens": 557292144.0, "step": 2920 }, { "epoch": 0.9970984809694488, "grad_norm": 0.2290820305287224, "learning_rate": 3.709534648457258e-05, "loss": 0.3841, "num_tokens": 557474675.0, "step": 2921 }, { "epoch": 0.9974398361495136, "grad_norm": 0.21200149442707827, "learning_rate": 3.708902377339403e-05, "loss": 0.3577, "num_tokens": 557646218.0, "step": 2922 }, { "epoch": 0.9977811913295784, "grad_norm": 0.2253915354687168, "learning_rate": 3.708270106221548e-05, "loss": 0.3939, "num_tokens": 557846240.0, "step": 2923 }, { "epoch": 0.9981225465096433, "grad_norm": 0.20752418179633944, "learning_rate": 3.707637835103693e-05, "loss": 0.3539, "num_tokens": 558030596.0, "step": 2924 }, { "epoch": 0.9984639016897081, "grad_norm": 0.21556096315175105, "learning_rate": 3.707005563985838e-05, "loss": 0.3398, "num_tokens": 558193454.0, "step": 2925 }, { "epoch": 0.998805256869773, "grad_norm": 0.2064854979056239, "learning_rate": 3.706373292867982e-05, "loss": 0.3628, "num_tokens": 558398966.0, "step": 2926 }, { "epoch": 0.9991466120498379, "grad_norm": 0.20646064728533847, "learning_rate": 3.7057410217501265e-05, "loss": 0.3612, "num_tokens": 558622346.0, "step": 2927 }, { "epoch": 0.9994879672299027, "grad_norm": 0.24195191173511962, "learning_rate": 3.705108750632271e-05, "loss": 0.3897, "num_tokens": 558791935.0, "step": 2928 }, { "epoch": 0.9998293224099676, "grad_norm": 0.20737725498641282, "learning_rate": 3.704476479514416e-05, "loss": 0.3908, "num_tokens": 559012091.0, "step": 2929 }, { "epoch": 1.0, "grad_norm": 0.20737725498641282, "learning_rate": 3.7038442083965606e-05, "loss": 0.3389, "num_tokens": 559068811.0, "step": 2930 }, { "epoch": 1.0003413551800648, "grad_norm": 0.34006400083274535, "learning_rate": 3.703211937278705e-05, "loss": 0.3058, "num_tokens": 559249694.0, "step": 2931 }, { "epoch": 1.0006827103601297, "grad_norm": 0.28159079598902953, "learning_rate": 3.70257966616085e-05, "loss": 0.3066, "num_tokens": 559438886.0, "step": 2932 }, { "epoch": 1.0010240655401945, "grad_norm": 0.18589042827133836, "learning_rate": 3.701947395042995e-05, "loss": 0.3062, "num_tokens": 559634416.0, "step": 2933 }, { "epoch": 1.0013654207202594, "grad_norm": 0.24194753064359603, "learning_rate": 3.7013151239251394e-05, "loss": 0.3022, "num_tokens": 559825103.0, "step": 2934 }, { "epoch": 1.0017067759003242, "grad_norm": 0.26154668324582814, "learning_rate": 3.700682852807284e-05, "loss": 0.3049, "num_tokens": 560034927.0, "step": 2935 }, { "epoch": 1.002048131080389, "grad_norm": 0.225723501106411, "learning_rate": 3.700050581689429e-05, "loss": 0.2964, "num_tokens": 560201644.0, "step": 2936 }, { "epoch": 1.002389486260454, "grad_norm": 0.3073109002387249, "learning_rate": 3.6994183105715735e-05, "loss": 0.324, "num_tokens": 560407029.0, "step": 2937 }, { "epoch": 1.002730841440519, "grad_norm": 0.32312629563019746, "learning_rate": 3.6987860394537175e-05, "loss": 0.3387, "num_tokens": 560596993.0, "step": 2938 }, { "epoch": 1.0030721966205838, "grad_norm": 0.21265483649530095, "learning_rate": 3.698153768335862e-05, "loss": 0.2919, "num_tokens": 560764566.0, "step": 2939 }, { "epoch": 1.0034135518006486, "grad_norm": 0.28109343583949165, "learning_rate": 3.697521497218007e-05, "loss": 0.3073, "num_tokens": 560942030.0, "step": 2940 }, { "epoch": 1.0037549069807135, "grad_norm": 0.283512414019452, "learning_rate": 3.696889226100152e-05, "loss": 0.3308, "num_tokens": 561112673.0, "step": 2941 }, { "epoch": 1.0040962621607783, "grad_norm": 0.2023951441846279, "learning_rate": 3.696256954982297e-05, "loss": 0.3071, "num_tokens": 561284011.0, "step": 2942 }, { "epoch": 1.0044376173408431, "grad_norm": 0.27426371718740356, "learning_rate": 3.695624683864442e-05, "loss": 0.2927, "num_tokens": 561461218.0, "step": 2943 }, { "epoch": 1.004778972520908, "grad_norm": 0.24884621197152218, "learning_rate": 3.694992412746586e-05, "loss": 0.3321, "num_tokens": 561658610.0, "step": 2944 }, { "epoch": 1.0051203277009728, "grad_norm": 0.24497349994208367, "learning_rate": 3.6943601416287304e-05, "loss": 0.3293, "num_tokens": 561868968.0, "step": 2945 }, { "epoch": 1.0054616828810377, "grad_norm": 0.26078648025309326, "learning_rate": 3.693727870510875e-05, "loss": 0.2941, "num_tokens": 562061770.0, "step": 2946 }, { "epoch": 1.0058030380611025, "grad_norm": 0.33383106165925264, "learning_rate": 3.69309559939302e-05, "loss": 0.3017, "num_tokens": 562236276.0, "step": 2947 }, { "epoch": 1.0061443932411673, "grad_norm": 0.22966521861118758, "learning_rate": 3.6924633282751645e-05, "loss": 0.3023, "num_tokens": 562444043.0, "step": 2948 }, { "epoch": 1.0064857484212324, "grad_norm": 0.21880628791962933, "learning_rate": 3.691831057157309e-05, "loss": 0.2738, "num_tokens": 562605228.0, "step": 2949 }, { "epoch": 1.0068271036012972, "grad_norm": 0.307287778569475, "learning_rate": 3.691198786039454e-05, "loss": 0.3357, "num_tokens": 562805087.0, "step": 2950 }, { "epoch": 1.007168458781362, "grad_norm": 0.22568159481028172, "learning_rate": 3.6905665149215986e-05, "loss": 0.2783, "num_tokens": 562964060.0, "step": 2951 }, { "epoch": 1.007509813961427, "grad_norm": 0.21926147954819347, "learning_rate": 3.689934243803743e-05, "loss": 0.2987, "num_tokens": 563192686.0, "step": 2952 }, { "epoch": 1.0078511691414918, "grad_norm": 0.22283617054572094, "learning_rate": 3.689301972685888e-05, "loss": 0.3221, "num_tokens": 563370393.0, "step": 2953 }, { "epoch": 1.0081925243215566, "grad_norm": 0.2186721920069707, "learning_rate": 3.688669701568033e-05, "loss": 0.2976, "num_tokens": 563535112.0, "step": 2954 }, { "epoch": 1.0085338795016214, "grad_norm": 0.2557087255691507, "learning_rate": 3.6880374304501774e-05, "loss": 0.3081, "num_tokens": 563738945.0, "step": 2955 }, { "epoch": 1.0088752346816863, "grad_norm": 0.19897728665041453, "learning_rate": 3.6874051593323214e-05, "loss": 0.3188, "num_tokens": 563940917.0, "step": 2956 }, { "epoch": 1.0092165898617511, "grad_norm": 0.2291283474492121, "learning_rate": 3.686772888214466e-05, "loss": 0.3081, "num_tokens": 564135934.0, "step": 2957 }, { "epoch": 1.009557945041816, "grad_norm": 0.22678553730249645, "learning_rate": 3.6861406170966115e-05, "loss": 0.3329, "num_tokens": 564313573.0, "step": 2958 }, { "epoch": 1.0098993002218808, "grad_norm": 0.25575697386887, "learning_rate": 3.685508345978756e-05, "loss": 0.3354, "num_tokens": 564527542.0, "step": 2959 }, { "epoch": 1.0102406554019456, "grad_norm": 0.2337349806152036, "learning_rate": 3.684876074860901e-05, "loss": 0.2967, "num_tokens": 564715187.0, "step": 2960 }, { "epoch": 1.0105820105820107, "grad_norm": 0.23707705547123817, "learning_rate": 3.6842438037430456e-05, "loss": 0.3062, "num_tokens": 564890920.0, "step": 2961 }, { "epoch": 1.0109233657620755, "grad_norm": 0.21332508226596777, "learning_rate": 3.6836115326251896e-05, "loss": 0.3168, "num_tokens": 565092038.0, "step": 2962 }, { "epoch": 1.0112647209421404, "grad_norm": 0.2571122538235865, "learning_rate": 3.682979261507334e-05, "loss": 0.3065, "num_tokens": 565328995.0, "step": 2963 }, { "epoch": 1.0116060761222052, "grad_norm": 0.22287110354247402, "learning_rate": 3.682346990389479e-05, "loss": 0.2866, "num_tokens": 565515735.0, "step": 2964 }, { "epoch": 1.01194743130227, "grad_norm": 0.20356100249890557, "learning_rate": 3.681714719271624e-05, "loss": 0.2822, "num_tokens": 565718342.0, "step": 2965 }, { "epoch": 1.012288786482335, "grad_norm": 0.23842814183978864, "learning_rate": 3.6810824481537684e-05, "loss": 0.2958, "num_tokens": 565932750.0, "step": 2966 }, { "epoch": 1.0126301416623997, "grad_norm": 0.24400945991541043, "learning_rate": 3.680450177035914e-05, "loss": 0.3172, "num_tokens": 566113605.0, "step": 2967 }, { "epoch": 1.0129714968424646, "grad_norm": 0.23793789993046313, "learning_rate": 3.679817905918058e-05, "loss": 0.3337, "num_tokens": 566296492.0, "step": 2968 }, { "epoch": 1.0133128520225294, "grad_norm": 0.2366787775693436, "learning_rate": 3.6791856348002025e-05, "loss": 0.308, "num_tokens": 566486799.0, "step": 2969 }, { "epoch": 1.0136542072025942, "grad_norm": 0.2624252153041104, "learning_rate": 3.678553363682347e-05, "loss": 0.3557, "num_tokens": 566691998.0, "step": 2970 }, { "epoch": 1.013995562382659, "grad_norm": 0.22045380822932412, "learning_rate": 3.677921092564492e-05, "loss": 0.309, "num_tokens": 566902638.0, "step": 2971 }, { "epoch": 1.014336917562724, "grad_norm": 0.2330514785471858, "learning_rate": 3.6772888214466366e-05, "loss": 0.3189, "num_tokens": 567071168.0, "step": 2972 }, { "epoch": 1.0146782727427888, "grad_norm": 0.2532165119953474, "learning_rate": 3.676656550328781e-05, "loss": 0.2885, "num_tokens": 567250866.0, "step": 2973 }, { "epoch": 1.0150196279228538, "grad_norm": 0.2165382384213945, "learning_rate": 3.676024279210925e-05, "loss": 0.3426, "num_tokens": 567436486.0, "step": 2974 }, { "epoch": 1.0153609831029187, "grad_norm": 0.23618503346122696, "learning_rate": 3.675392008093071e-05, "loss": 0.328, "num_tokens": 567631652.0, "step": 2975 }, { "epoch": 1.0157023382829835, "grad_norm": 0.2793927910059117, "learning_rate": 3.6747597369752154e-05, "loss": 0.288, "num_tokens": 567808933.0, "step": 2976 }, { "epoch": 1.0160436934630483, "grad_norm": 0.25524275508550287, "learning_rate": 3.67412746585736e-05, "loss": 0.2839, "num_tokens": 567957738.0, "step": 2977 }, { "epoch": 1.0163850486431132, "grad_norm": 0.2977578575949746, "learning_rate": 3.673495194739505e-05, "loss": 0.2913, "num_tokens": 568148550.0, "step": 2978 }, { "epoch": 1.016726403823178, "grad_norm": 0.2980670510815921, "learning_rate": 3.6728629236216495e-05, "loss": 0.3054, "num_tokens": 568362702.0, "step": 2979 }, { "epoch": 1.0170677590032429, "grad_norm": 0.21080663837458044, "learning_rate": 3.6722306525037935e-05, "loss": 0.2855, "num_tokens": 568581883.0, "step": 2980 }, { "epoch": 1.0174091141833077, "grad_norm": 0.2129555315410394, "learning_rate": 3.671598381385938e-05, "loss": 0.3104, "num_tokens": 568799468.0, "step": 2981 }, { "epoch": 1.0177504693633725, "grad_norm": 0.2508226683971591, "learning_rate": 3.670966110268083e-05, "loss": 0.3019, "num_tokens": 568976312.0, "step": 2982 }, { "epoch": 1.0180918245434374, "grad_norm": 0.2438940784779583, "learning_rate": 3.6703338391502276e-05, "loss": 0.3069, "num_tokens": 569129102.0, "step": 2983 }, { "epoch": 1.0184331797235022, "grad_norm": 0.2447571881287923, "learning_rate": 3.669701568032373e-05, "loss": 0.3022, "num_tokens": 569311114.0, "step": 2984 }, { "epoch": 1.018774534903567, "grad_norm": 0.2515299907194023, "learning_rate": 3.669069296914518e-05, "loss": 0.3041, "num_tokens": 569486319.0, "step": 2985 }, { "epoch": 1.0191158900836321, "grad_norm": 0.21881278583801178, "learning_rate": 3.668437025796662e-05, "loss": 0.3119, "num_tokens": 569742893.0, "step": 2986 }, { "epoch": 1.019457245263697, "grad_norm": 0.2332897765193224, "learning_rate": 3.6678047546788064e-05, "loss": 0.3132, "num_tokens": 569971155.0, "step": 2987 }, { "epoch": 1.0197986004437618, "grad_norm": 0.25524298303731946, "learning_rate": 3.667172483560951e-05, "loss": 0.2984, "num_tokens": 570136913.0, "step": 2988 }, { "epoch": 1.0201399556238266, "grad_norm": 0.2361083252295952, "learning_rate": 3.666540212443096e-05, "loss": 0.3209, "num_tokens": 570324167.0, "step": 2989 }, { "epoch": 1.0204813108038915, "grad_norm": 0.23360133570594988, "learning_rate": 3.6659079413252405e-05, "loss": 0.3072, "num_tokens": 570549205.0, "step": 2990 }, { "epoch": 1.0208226659839563, "grad_norm": 0.21093010267980403, "learning_rate": 3.6652756702073845e-05, "loss": 0.3209, "num_tokens": 570774133.0, "step": 2991 }, { "epoch": 1.0211640211640212, "grad_norm": 0.23521266435344296, "learning_rate": 3.66464339908953e-05, "loss": 0.2979, "num_tokens": 570941419.0, "step": 2992 }, { "epoch": 1.021505376344086, "grad_norm": 0.24195263351322022, "learning_rate": 3.6640111279716746e-05, "loss": 0.3234, "num_tokens": 571155681.0, "step": 2993 }, { "epoch": 1.0218467315241508, "grad_norm": 0.2352462884314632, "learning_rate": 3.663378856853819e-05, "loss": 0.3246, "num_tokens": 571340384.0, "step": 2994 }, { "epoch": 1.0221880867042157, "grad_norm": 0.24090996981124685, "learning_rate": 3.662746585735964e-05, "loss": 0.2807, "num_tokens": 571500114.0, "step": 2995 }, { "epoch": 1.0225294418842805, "grad_norm": 0.2271036231376271, "learning_rate": 3.662114314618109e-05, "loss": 0.3285, "num_tokens": 571696189.0, "step": 2996 }, { "epoch": 1.0228707970643454, "grad_norm": 0.2529771311180978, "learning_rate": 3.661482043500253e-05, "loss": 0.3318, "num_tokens": 571894826.0, "step": 2997 }, { "epoch": 1.0232121522444102, "grad_norm": 0.276021266830477, "learning_rate": 3.6608497723823974e-05, "loss": 0.312, "num_tokens": 572125742.0, "step": 2998 }, { "epoch": 1.0235535074244753, "grad_norm": 0.22213691301774216, "learning_rate": 3.660217501264542e-05, "loss": 0.326, "num_tokens": 572320819.0, "step": 2999 }, { "epoch": 1.02389486260454, "grad_norm": 0.21898574554531172, "learning_rate": 3.659585230146687e-05, "loss": 0.3102, "num_tokens": 572521132.0, "step": 3000 }, { "epoch": 1.024236217784605, "grad_norm": 0.2539438835541723, "learning_rate": 3.658952959028832e-05, "loss": 0.2719, "num_tokens": 572690846.0, "step": 3001 }, { "epoch": 1.0245775729646698, "grad_norm": 0.18655639980182945, "learning_rate": 3.658320687910977e-05, "loss": 0.3056, "num_tokens": 572893926.0, "step": 3002 }, { "epoch": 1.0249189281447346, "grad_norm": 0.2339045410469957, "learning_rate": 3.657688416793121e-05, "loss": 0.3022, "num_tokens": 573042343.0, "step": 3003 }, { "epoch": 1.0252602833247995, "grad_norm": 0.288645766004621, "learning_rate": 3.6570561456752656e-05, "loss": 0.3177, "num_tokens": 573238969.0, "step": 3004 }, { "epoch": 1.0256016385048643, "grad_norm": 0.19382592327748754, "learning_rate": 3.65642387455741e-05, "loss": 0.2796, "num_tokens": 573412232.0, "step": 3005 }, { "epoch": 1.0259429936849291, "grad_norm": 0.22891887684771292, "learning_rate": 3.655791603439555e-05, "loss": 0.2868, "num_tokens": 573588542.0, "step": 3006 }, { "epoch": 1.026284348864994, "grad_norm": 0.28011357914623786, "learning_rate": 3.6551593323217e-05, "loss": 0.3076, "num_tokens": 573781723.0, "step": 3007 }, { "epoch": 1.0266257040450588, "grad_norm": 0.267201341758226, "learning_rate": 3.6545270612038444e-05, "loss": 0.292, "num_tokens": 573965334.0, "step": 3008 }, { "epoch": 1.0269670592251237, "grad_norm": 0.22832225983732568, "learning_rate": 3.653894790085989e-05, "loss": 0.3284, "num_tokens": 574144135.0, "step": 3009 }, { "epoch": 1.0273084144051885, "grad_norm": 0.34280560802881765, "learning_rate": 3.653262518968134e-05, "loss": 0.3114, "num_tokens": 574319391.0, "step": 3010 }, { "epoch": 1.0276497695852536, "grad_norm": 0.21228666861798037, "learning_rate": 3.6526302478502785e-05, "loss": 0.3205, "num_tokens": 574531493.0, "step": 3011 }, { "epoch": 1.0279911247653184, "grad_norm": 0.23732629718660017, "learning_rate": 3.651997976732423e-05, "loss": 0.325, "num_tokens": 574708827.0, "step": 3012 }, { "epoch": 1.0283324799453832, "grad_norm": 0.24671708598869246, "learning_rate": 3.651365705614568e-05, "loss": 0.3026, "num_tokens": 574912084.0, "step": 3013 }, { "epoch": 1.028673835125448, "grad_norm": 0.26683526921605283, "learning_rate": 3.6507334344967126e-05, "loss": 0.3388, "num_tokens": 575122776.0, "step": 3014 }, { "epoch": 1.029015190305513, "grad_norm": 0.23326663819602855, "learning_rate": 3.6501011633788566e-05, "loss": 0.3186, "num_tokens": 575352198.0, "step": 3015 }, { "epoch": 1.0293565454855778, "grad_norm": 0.21603387629754084, "learning_rate": 3.649468892261001e-05, "loss": 0.266, "num_tokens": 575549375.0, "step": 3016 }, { "epoch": 1.0296979006656426, "grad_norm": 0.21807555094874032, "learning_rate": 3.648836621143146e-05, "loss": 0.3049, "num_tokens": 575780543.0, "step": 3017 }, { "epoch": 1.0300392558457074, "grad_norm": 0.22676493702431935, "learning_rate": 3.6482043500252914e-05, "loss": 0.2997, "num_tokens": 575943302.0, "step": 3018 }, { "epoch": 1.0303806110257723, "grad_norm": 0.22848544021732417, "learning_rate": 3.647572078907436e-05, "loss": 0.3094, "num_tokens": 576144061.0, "step": 3019 }, { "epoch": 1.0307219662058371, "grad_norm": 0.2531134975512424, "learning_rate": 3.646939807789581e-05, "loss": 0.2939, "num_tokens": 576333146.0, "step": 3020 }, { "epoch": 1.031063321385902, "grad_norm": 0.23430449388313565, "learning_rate": 3.646307536671725e-05, "loss": 0.3049, "num_tokens": 576550308.0, "step": 3021 }, { "epoch": 1.0314046765659668, "grad_norm": 0.3278180854814666, "learning_rate": 3.6456752655538695e-05, "loss": 0.3324, "num_tokens": 576738845.0, "step": 3022 }, { "epoch": 1.0317460317460316, "grad_norm": 0.18105996231819338, "learning_rate": 3.645042994436014e-05, "loss": 0.3269, "num_tokens": 576974250.0, "step": 3023 }, { "epoch": 1.0320873869260967, "grad_norm": 0.2596568150146823, "learning_rate": 3.644410723318159e-05, "loss": 0.3051, "num_tokens": 577135163.0, "step": 3024 }, { "epoch": 1.0324287421061615, "grad_norm": 0.22335783118671357, "learning_rate": 3.6437784522003036e-05, "loss": 0.2997, "num_tokens": 577336017.0, "step": 3025 }, { "epoch": 1.0327700972862264, "grad_norm": 0.26241795838093107, "learning_rate": 3.643146181082448e-05, "loss": 0.2905, "num_tokens": 577533792.0, "step": 3026 }, { "epoch": 1.0331114524662912, "grad_norm": 0.19458259044259077, "learning_rate": 3.642513909964593e-05, "loss": 0.3069, "num_tokens": 577690907.0, "step": 3027 }, { "epoch": 1.033452807646356, "grad_norm": 0.2776950978857116, "learning_rate": 3.6418816388467377e-05, "loss": 0.3344, "num_tokens": 577904071.0, "step": 3028 }, { "epoch": 1.033794162826421, "grad_norm": 0.2468525560893364, "learning_rate": 3.6412493677288824e-05, "loss": 0.3276, "num_tokens": 578055161.0, "step": 3029 }, { "epoch": 1.0341355180064857, "grad_norm": 0.2781265685132248, "learning_rate": 3.640617096611027e-05, "loss": 0.3048, "num_tokens": 578288805.0, "step": 3030 }, { "epoch": 1.0344768731865506, "grad_norm": 0.18794283179963478, "learning_rate": 3.639984825493172e-05, "loss": 0.3272, "num_tokens": 578512846.0, "step": 3031 }, { "epoch": 1.0348182283666154, "grad_norm": 0.2550362102555204, "learning_rate": 3.6393525543753165e-05, "loss": 0.2819, "num_tokens": 578760403.0, "step": 3032 }, { "epoch": 1.0351595835466803, "grad_norm": 0.21160449529391062, "learning_rate": 3.6387202832574605e-05, "loss": 0.3261, "num_tokens": 578969318.0, "step": 3033 }, { "epoch": 1.035500938726745, "grad_norm": 0.22174366781612262, "learning_rate": 3.638088012139605e-05, "loss": 0.2867, "num_tokens": 579164689.0, "step": 3034 }, { "epoch": 1.03584229390681, "grad_norm": 0.22536196303632383, "learning_rate": 3.6374557410217505e-05, "loss": 0.3111, "num_tokens": 579361010.0, "step": 3035 }, { "epoch": 1.036183649086875, "grad_norm": 0.24828384789316493, "learning_rate": 3.636823469903895e-05, "loss": 0.311, "num_tokens": 579540096.0, "step": 3036 }, { "epoch": 1.0365250042669398, "grad_norm": 0.24094546928885643, "learning_rate": 3.63619119878604e-05, "loss": 0.3097, "num_tokens": 579741699.0, "step": 3037 }, { "epoch": 1.0368663594470047, "grad_norm": 0.21859217586700347, "learning_rate": 3.6355589276681846e-05, "loss": 0.2927, "num_tokens": 579919465.0, "step": 3038 }, { "epoch": 1.0372077146270695, "grad_norm": 0.25711998523722635, "learning_rate": 3.634926656550329e-05, "loss": 0.3075, "num_tokens": 580096225.0, "step": 3039 }, { "epoch": 1.0375490698071343, "grad_norm": 0.22982773776165263, "learning_rate": 3.6342943854324734e-05, "loss": 0.3203, "num_tokens": 580302653.0, "step": 3040 }, { "epoch": 1.0378904249871992, "grad_norm": 0.21727251888053825, "learning_rate": 3.633662114314618e-05, "loss": 0.3131, "num_tokens": 580456607.0, "step": 3041 }, { "epoch": 1.038231780167264, "grad_norm": 0.2580828806911085, "learning_rate": 3.633029843196763e-05, "loss": 0.3377, "num_tokens": 580652562.0, "step": 3042 }, { "epoch": 1.0385731353473289, "grad_norm": 0.2796114959226588, "learning_rate": 3.6323975720789075e-05, "loss": 0.332, "num_tokens": 580830390.0, "step": 3043 }, { "epoch": 1.0389144905273937, "grad_norm": 0.2307047262339014, "learning_rate": 3.631765300961053e-05, "loss": 0.3079, "num_tokens": 581017760.0, "step": 3044 }, { "epoch": 1.0392558457074585, "grad_norm": 0.24077265641786205, "learning_rate": 3.631133029843197e-05, "loss": 0.3345, "num_tokens": 581238042.0, "step": 3045 }, { "epoch": 1.0395972008875234, "grad_norm": 0.26835177878937067, "learning_rate": 3.6305007587253416e-05, "loss": 0.3087, "num_tokens": 581413409.0, "step": 3046 }, { "epoch": 1.0399385560675882, "grad_norm": 0.24752106019052247, "learning_rate": 3.629868487607486e-05, "loss": 0.3283, "num_tokens": 581609481.0, "step": 3047 }, { "epoch": 1.0402799112476533, "grad_norm": 0.2704093467930853, "learning_rate": 3.629236216489631e-05, "loss": 0.3123, "num_tokens": 581830177.0, "step": 3048 }, { "epoch": 1.0406212664277181, "grad_norm": 0.2227042773704346, "learning_rate": 3.6286039453717756e-05, "loss": 0.291, "num_tokens": 582013861.0, "step": 3049 }, { "epoch": 1.040962621607783, "grad_norm": 0.24233087201180076, "learning_rate": 3.6279716742539203e-05, "loss": 0.2971, "num_tokens": 582182145.0, "step": 3050 }, { "epoch": 1.0413039767878478, "grad_norm": 0.23154980431782798, "learning_rate": 3.6273394031360644e-05, "loss": 0.2811, "num_tokens": 582356245.0, "step": 3051 }, { "epoch": 1.0416453319679126, "grad_norm": 0.24674440972785733, "learning_rate": 3.62670713201821e-05, "loss": 0.3214, "num_tokens": 582536667.0, "step": 3052 }, { "epoch": 1.0419866871479775, "grad_norm": 0.2593098124068622, "learning_rate": 3.6260748609003544e-05, "loss": 0.2895, "num_tokens": 582729683.0, "step": 3053 }, { "epoch": 1.0423280423280423, "grad_norm": 0.2126772608416265, "learning_rate": 3.625442589782499e-05, "loss": 0.3009, "num_tokens": 582928843.0, "step": 3054 }, { "epoch": 1.0426693975081072, "grad_norm": 0.23435831268635954, "learning_rate": 3.624810318664644e-05, "loss": 0.299, "num_tokens": 583086387.0, "step": 3055 }, { "epoch": 1.043010752688172, "grad_norm": 0.27499520061962984, "learning_rate": 3.6241780475467885e-05, "loss": 0.3365, "num_tokens": 583289955.0, "step": 3056 }, { "epoch": 1.0433521078682368, "grad_norm": 0.2566561244844522, "learning_rate": 3.6235457764289326e-05, "loss": 0.2947, "num_tokens": 583478015.0, "step": 3057 }, { "epoch": 1.0436934630483017, "grad_norm": 0.23998850270702285, "learning_rate": 3.622913505311077e-05, "loss": 0.32, "num_tokens": 583686282.0, "step": 3058 }, { "epoch": 1.0440348182283665, "grad_norm": 0.234016086134986, "learning_rate": 3.622281234193222e-05, "loss": 0.3169, "num_tokens": 583887352.0, "step": 3059 }, { "epoch": 1.0443761734084314, "grad_norm": 0.22922252338385396, "learning_rate": 3.6216489630753667e-05, "loss": 0.3164, "num_tokens": 584084838.0, "step": 3060 }, { "epoch": 1.0447175285884964, "grad_norm": 0.22862243314107514, "learning_rate": 3.621016691957512e-05, "loss": 0.3137, "num_tokens": 584280971.0, "step": 3061 }, { "epoch": 1.0450588837685613, "grad_norm": 0.24692623406941286, "learning_rate": 3.620384420839657e-05, "loss": 0.3115, "num_tokens": 584456059.0, "step": 3062 }, { "epoch": 1.045400238948626, "grad_norm": 0.22884674948980366, "learning_rate": 3.619752149721801e-05, "loss": 0.3445, "num_tokens": 584663475.0, "step": 3063 }, { "epoch": 1.045741594128691, "grad_norm": 0.2569824955588746, "learning_rate": 3.6191198786039454e-05, "loss": 0.3371, "num_tokens": 584862705.0, "step": 3064 }, { "epoch": 1.0460829493087558, "grad_norm": 0.23329638294504546, "learning_rate": 3.61848760748609e-05, "loss": 0.3078, "num_tokens": 585061597.0, "step": 3065 }, { "epoch": 1.0464243044888206, "grad_norm": 0.24514068764479238, "learning_rate": 3.617855336368235e-05, "loss": 0.3203, "num_tokens": 585250271.0, "step": 3066 }, { "epoch": 1.0467656596688855, "grad_norm": 0.25492892329611777, "learning_rate": 3.6172230652503795e-05, "loss": 0.3075, "num_tokens": 585432970.0, "step": 3067 }, { "epoch": 1.0471070148489503, "grad_norm": 0.26093258621502735, "learning_rate": 3.616590794132524e-05, "loss": 0.2998, "num_tokens": 585604535.0, "step": 3068 }, { "epoch": 1.0474483700290151, "grad_norm": 0.22961808004715714, "learning_rate": 3.615958523014669e-05, "loss": 0.3332, "num_tokens": 585808629.0, "step": 3069 }, { "epoch": 1.04778972520908, "grad_norm": 0.2085961313667365, "learning_rate": 3.6153262518968136e-05, "loss": 0.3083, "num_tokens": 586008797.0, "step": 3070 }, { "epoch": 1.0481310803891448, "grad_norm": 0.23403777416206684, "learning_rate": 3.614693980778958e-05, "loss": 0.2921, "num_tokens": 586183519.0, "step": 3071 }, { "epoch": 1.0484724355692097, "grad_norm": 0.22196773617459314, "learning_rate": 3.614061709661103e-05, "loss": 0.2954, "num_tokens": 586365561.0, "step": 3072 }, { "epoch": 1.0488137907492747, "grad_norm": 0.23729396944144923, "learning_rate": 3.613429438543248e-05, "loss": 0.2787, "num_tokens": 586547300.0, "step": 3073 }, { "epoch": 1.0491551459293396, "grad_norm": 0.25084163772016427, "learning_rate": 3.6127971674253924e-05, "loss": 0.3187, "num_tokens": 586729921.0, "step": 3074 }, { "epoch": 1.0494965011094044, "grad_norm": 0.24658325235227796, "learning_rate": 3.6121648963075364e-05, "loss": 0.3221, "num_tokens": 586945261.0, "step": 3075 }, { "epoch": 1.0498378562894692, "grad_norm": 0.22114102162869897, "learning_rate": 3.611532625189681e-05, "loss": 0.3252, "num_tokens": 587137115.0, "step": 3076 }, { "epoch": 1.050179211469534, "grad_norm": 0.26692803058814685, "learning_rate": 3.610900354071826e-05, "loss": 0.31, "num_tokens": 587342563.0, "step": 3077 }, { "epoch": 1.050520566649599, "grad_norm": 0.2643605513989325, "learning_rate": 3.610268082953971e-05, "loss": 0.3179, "num_tokens": 587524289.0, "step": 3078 }, { "epoch": 1.0508619218296638, "grad_norm": 0.23649181769472555, "learning_rate": 3.609635811836116e-05, "loss": 0.2871, "num_tokens": 587692743.0, "step": 3079 }, { "epoch": 1.0512032770097286, "grad_norm": 0.22938081534711763, "learning_rate": 3.6090035407182606e-05, "loss": 0.2859, "num_tokens": 587853155.0, "step": 3080 }, { "epoch": 1.0515446321897934, "grad_norm": 0.2442330051091109, "learning_rate": 3.6083712696004046e-05, "loss": 0.3086, "num_tokens": 588044114.0, "step": 3081 }, { "epoch": 1.0518859873698583, "grad_norm": 0.2557322356827533, "learning_rate": 3.607738998482549e-05, "loss": 0.2929, "num_tokens": 588211655.0, "step": 3082 }, { "epoch": 1.0522273425499231, "grad_norm": 0.23775678552215665, "learning_rate": 3.607106727364694e-05, "loss": 0.2836, "num_tokens": 588416734.0, "step": 3083 }, { "epoch": 1.052568697729988, "grad_norm": 0.19524680215765336, "learning_rate": 3.606474456246839e-05, "loss": 0.3004, "num_tokens": 588609636.0, "step": 3084 }, { "epoch": 1.052910052910053, "grad_norm": 0.2532113016708949, "learning_rate": 3.6058421851289834e-05, "loss": 0.32, "num_tokens": 588795305.0, "step": 3085 }, { "epoch": 1.0532514080901179, "grad_norm": 0.22769180241498604, "learning_rate": 3.605209914011128e-05, "loss": 0.3115, "num_tokens": 588976852.0, "step": 3086 }, { "epoch": 1.0535927632701827, "grad_norm": 0.23988029548310755, "learning_rate": 3.604577642893273e-05, "loss": 0.2987, "num_tokens": 589173705.0, "step": 3087 }, { "epoch": 1.0539341184502475, "grad_norm": 0.22609611721043107, "learning_rate": 3.6039453717754175e-05, "loss": 0.3031, "num_tokens": 589351913.0, "step": 3088 }, { "epoch": 1.0542754736303124, "grad_norm": 0.21833631727093303, "learning_rate": 3.603313100657562e-05, "loss": 0.3269, "num_tokens": 589558956.0, "step": 3089 }, { "epoch": 1.0546168288103772, "grad_norm": 0.2379898406036868, "learning_rate": 3.602680829539707e-05, "loss": 0.2855, "num_tokens": 589741361.0, "step": 3090 }, { "epoch": 1.054958183990442, "grad_norm": 0.22476938971661198, "learning_rate": 3.6020485584218516e-05, "loss": 0.2976, "num_tokens": 589935279.0, "step": 3091 }, { "epoch": 1.055299539170507, "grad_norm": 0.2293035195176464, "learning_rate": 3.601416287303996e-05, "loss": 0.2891, "num_tokens": 590131965.0, "step": 3092 }, { "epoch": 1.0556408943505717, "grad_norm": 0.2127935618631566, "learning_rate": 3.6007840161861403e-05, "loss": 0.3009, "num_tokens": 590334123.0, "step": 3093 }, { "epoch": 1.0559822495306366, "grad_norm": 0.22760801506382058, "learning_rate": 3.600151745068285e-05, "loss": 0.3173, "num_tokens": 590535954.0, "step": 3094 }, { "epoch": 1.0563236047107014, "grad_norm": 0.22126163906230076, "learning_rate": 3.5995194739504304e-05, "loss": 0.3475, "num_tokens": 590755603.0, "step": 3095 }, { "epoch": 1.0566649598907663, "grad_norm": 0.23531832850816267, "learning_rate": 3.598887202832575e-05, "loss": 0.2961, "num_tokens": 590981286.0, "step": 3096 }, { "epoch": 1.057006315070831, "grad_norm": 0.2199329507602309, "learning_rate": 3.59825493171472e-05, "loss": 0.2896, "num_tokens": 591164096.0, "step": 3097 }, { "epoch": 1.0573476702508962, "grad_norm": 0.23366618762282385, "learning_rate": 3.5976226605968645e-05, "loss": 0.2911, "num_tokens": 591304555.0, "step": 3098 }, { "epoch": 1.057689025430961, "grad_norm": 0.2457557273258615, "learning_rate": 3.5969903894790085e-05, "loss": 0.2813, "num_tokens": 591458338.0, "step": 3099 }, { "epoch": 1.0580303806110258, "grad_norm": 0.21489241866541367, "learning_rate": 3.596358118361153e-05, "loss": 0.2976, "num_tokens": 591647562.0, "step": 3100 }, { "epoch": 1.0583717357910907, "grad_norm": 0.26498898419960143, "learning_rate": 3.595725847243298e-05, "loss": 0.278, "num_tokens": 591829618.0, "step": 3101 }, { "epoch": 1.0587130909711555, "grad_norm": 0.1975258968131739, "learning_rate": 3.5950935761254426e-05, "loss": 0.3065, "num_tokens": 591977440.0, "step": 3102 }, { "epoch": 1.0590544461512204, "grad_norm": 0.2525480964288246, "learning_rate": 3.594461305007587e-05, "loss": 0.3288, "num_tokens": 592184773.0, "step": 3103 }, { "epoch": 1.0593958013312852, "grad_norm": 0.22642563084544162, "learning_rate": 3.593829033889732e-05, "loss": 0.3263, "num_tokens": 592406801.0, "step": 3104 }, { "epoch": 1.05973715651135, "grad_norm": 0.24414409979942334, "learning_rate": 3.593196762771877e-05, "loss": 0.3394, "num_tokens": 592615326.0, "step": 3105 }, { "epoch": 1.0600785116914149, "grad_norm": 0.25509891792416206, "learning_rate": 3.5925644916540214e-05, "loss": 0.3383, "num_tokens": 592828613.0, "step": 3106 }, { "epoch": 1.0604198668714797, "grad_norm": 0.23209111853917921, "learning_rate": 3.591932220536166e-05, "loss": 0.3289, "num_tokens": 593057250.0, "step": 3107 }, { "epoch": 1.0607612220515446, "grad_norm": 0.29090409823098656, "learning_rate": 3.591299949418311e-05, "loss": 0.2837, "num_tokens": 593239253.0, "step": 3108 }, { "epoch": 1.0611025772316094, "grad_norm": 0.2106753305651745, "learning_rate": 3.5906676783004555e-05, "loss": 0.3056, "num_tokens": 593432313.0, "step": 3109 }, { "epoch": 1.0614439324116745, "grad_norm": 0.25135722720557374, "learning_rate": 3.5900354071826e-05, "loss": 0.3007, "num_tokens": 593641251.0, "step": 3110 }, { "epoch": 1.0617852875917393, "grad_norm": 0.22243484503895053, "learning_rate": 3.589403136064744e-05, "loss": 0.289, "num_tokens": 593840341.0, "step": 3111 }, { "epoch": 1.0621266427718041, "grad_norm": 0.20981666681012845, "learning_rate": 3.5887708649468896e-05, "loss": 0.2934, "num_tokens": 594030487.0, "step": 3112 }, { "epoch": 1.062467997951869, "grad_norm": 0.21596269956802872, "learning_rate": 3.588138593829034e-05, "loss": 0.308, "num_tokens": 594213960.0, "step": 3113 }, { "epoch": 1.0628093531319338, "grad_norm": 0.23292952354917418, "learning_rate": 3.587506322711179e-05, "loss": 0.2882, "num_tokens": 594433408.0, "step": 3114 }, { "epoch": 1.0631507083119986, "grad_norm": 0.2146655595330836, "learning_rate": 3.586874051593324e-05, "loss": 0.2955, "num_tokens": 594604365.0, "step": 3115 }, { "epoch": 1.0634920634920635, "grad_norm": 0.22082833856477194, "learning_rate": 3.5862417804754684e-05, "loss": 0.3178, "num_tokens": 594823790.0, "step": 3116 }, { "epoch": 1.0638334186721283, "grad_norm": 0.26846525831250106, "learning_rate": 3.5856095093576124e-05, "loss": 0.3343, "num_tokens": 595029050.0, "step": 3117 }, { "epoch": 1.0641747738521932, "grad_norm": 0.2745104730514609, "learning_rate": 3.584977238239757e-05, "loss": 0.2862, "num_tokens": 595184911.0, "step": 3118 }, { "epoch": 1.064516129032258, "grad_norm": 0.2388914643592523, "learning_rate": 3.584344967121902e-05, "loss": 0.2984, "num_tokens": 595371756.0, "step": 3119 }, { "epoch": 1.0648574842123228, "grad_norm": 0.2304913400523901, "learning_rate": 3.5837126960040465e-05, "loss": 0.3229, "num_tokens": 595563679.0, "step": 3120 }, { "epoch": 1.0651988393923877, "grad_norm": 0.2264241507067535, "learning_rate": 3.583080424886192e-05, "loss": 0.3279, "num_tokens": 595762359.0, "step": 3121 }, { "epoch": 1.0655401945724527, "grad_norm": 0.22836344223245536, "learning_rate": 3.582448153768336e-05, "loss": 0.2915, "num_tokens": 595954666.0, "step": 3122 }, { "epoch": 1.0658815497525176, "grad_norm": 0.224053892407339, "learning_rate": 3.5818158826504806e-05, "loss": 0.3147, "num_tokens": 596169048.0, "step": 3123 }, { "epoch": 1.0662229049325824, "grad_norm": 0.2505302899595106, "learning_rate": 3.581183611532625e-05, "loss": 0.3112, "num_tokens": 596361803.0, "step": 3124 }, { "epoch": 1.0665642601126473, "grad_norm": 0.2500094814039549, "learning_rate": 3.58055134041477e-05, "loss": 0.2912, "num_tokens": 596575851.0, "step": 3125 }, { "epoch": 1.066905615292712, "grad_norm": 0.1993578776992888, "learning_rate": 3.579919069296915e-05, "loss": 0.3199, "num_tokens": 596822028.0, "step": 3126 }, { "epoch": 1.067246970472777, "grad_norm": 0.23005298494295512, "learning_rate": 3.5792867981790594e-05, "loss": 0.3389, "num_tokens": 597034238.0, "step": 3127 }, { "epoch": 1.0675883256528418, "grad_norm": 0.26396663844192947, "learning_rate": 3.5786545270612034e-05, "loss": 0.3319, "num_tokens": 597223445.0, "step": 3128 }, { "epoch": 1.0679296808329066, "grad_norm": 0.19038058660353144, "learning_rate": 3.578022255943349e-05, "loss": 0.2754, "num_tokens": 597421842.0, "step": 3129 }, { "epoch": 1.0682710360129715, "grad_norm": 0.24905738979796796, "learning_rate": 3.5773899848254935e-05, "loss": 0.3046, "num_tokens": 597625138.0, "step": 3130 }, { "epoch": 1.0686123911930363, "grad_norm": 0.24648882896550275, "learning_rate": 3.576757713707638e-05, "loss": 0.3108, "num_tokens": 597848558.0, "step": 3131 }, { "epoch": 1.0689537463731011, "grad_norm": 0.21484449089723368, "learning_rate": 3.576125442589783e-05, "loss": 0.3198, "num_tokens": 598037456.0, "step": 3132 }, { "epoch": 1.069295101553166, "grad_norm": 0.26289069216184163, "learning_rate": 3.5754931714719276e-05, "loss": 0.3066, "num_tokens": 598208594.0, "step": 3133 }, { "epoch": 1.0696364567332308, "grad_norm": 0.2441649181494587, "learning_rate": 3.5748609003540716e-05, "loss": 0.3361, "num_tokens": 598407351.0, "step": 3134 }, { "epoch": 1.0699778119132959, "grad_norm": 0.2666920957357544, "learning_rate": 3.574228629236216e-05, "loss": 0.2962, "num_tokens": 598604763.0, "step": 3135 }, { "epoch": 1.0703191670933607, "grad_norm": 0.23701538743075867, "learning_rate": 3.573596358118361e-05, "loss": 0.2858, "num_tokens": 598780304.0, "step": 3136 }, { "epoch": 1.0706605222734256, "grad_norm": 0.20951340153599582, "learning_rate": 3.572964087000506e-05, "loss": 0.3025, "num_tokens": 598973061.0, "step": 3137 }, { "epoch": 1.0710018774534904, "grad_norm": 0.2369343581862474, "learning_rate": 3.572331815882651e-05, "loss": 0.3046, "num_tokens": 599157319.0, "step": 3138 }, { "epoch": 1.0713432326335552, "grad_norm": 0.26020815366472444, "learning_rate": 3.571699544764796e-05, "loss": 0.3107, "num_tokens": 599330193.0, "step": 3139 }, { "epoch": 1.07168458781362, "grad_norm": 0.2518011999899607, "learning_rate": 3.57106727364694e-05, "loss": 0.2902, "num_tokens": 599504377.0, "step": 3140 }, { "epoch": 1.072025942993685, "grad_norm": 0.24089029314652713, "learning_rate": 3.5704350025290845e-05, "loss": 0.307, "num_tokens": 599679641.0, "step": 3141 }, { "epoch": 1.0723672981737498, "grad_norm": 0.24504972261890245, "learning_rate": 3.569802731411229e-05, "loss": 0.3099, "num_tokens": 599834392.0, "step": 3142 }, { "epoch": 1.0727086533538146, "grad_norm": 0.2408766331386124, "learning_rate": 3.569170460293374e-05, "loss": 0.2675, "num_tokens": 600008804.0, "step": 3143 }, { "epoch": 1.0730500085338794, "grad_norm": 0.24231635134246052, "learning_rate": 3.5685381891755186e-05, "loss": 0.3257, "num_tokens": 600224777.0, "step": 3144 }, { "epoch": 1.0733913637139443, "grad_norm": 0.23618879991265485, "learning_rate": 3.567905918057663e-05, "loss": 0.289, "num_tokens": 600400175.0, "step": 3145 }, { "epoch": 1.0737327188940091, "grad_norm": 0.2543744840580058, "learning_rate": 3.567273646939808e-05, "loss": 0.2992, "num_tokens": 600598080.0, "step": 3146 }, { "epoch": 1.074074074074074, "grad_norm": 0.25340690723664144, "learning_rate": 3.566641375821953e-05, "loss": 0.295, "num_tokens": 600773079.0, "step": 3147 }, { "epoch": 1.074415429254139, "grad_norm": 0.2290606459317676, "learning_rate": 3.5660091047040974e-05, "loss": 0.3238, "num_tokens": 600948985.0, "step": 3148 }, { "epoch": 1.0747567844342039, "grad_norm": 0.2923011876083599, "learning_rate": 3.565376833586242e-05, "loss": 0.3466, "num_tokens": 601125690.0, "step": 3149 }, { "epoch": 1.0750981396142687, "grad_norm": 0.23710100992415384, "learning_rate": 3.564744562468387e-05, "loss": 0.3036, "num_tokens": 601309207.0, "step": 3150 }, { "epoch": 1.0754394947943335, "grad_norm": 0.22206217532717235, "learning_rate": 3.5641122913505315e-05, "loss": 0.316, "num_tokens": 601509899.0, "step": 3151 }, { "epoch": 1.0757808499743984, "grad_norm": 0.2562233813271814, "learning_rate": 3.5634800202326755e-05, "loss": 0.3381, "num_tokens": 601722292.0, "step": 3152 }, { "epoch": 1.0761222051544632, "grad_norm": 0.25050891028204564, "learning_rate": 3.56284774911482e-05, "loss": 0.3022, "num_tokens": 601910519.0, "step": 3153 }, { "epoch": 1.076463560334528, "grad_norm": 0.23318446441777804, "learning_rate": 3.562215477996965e-05, "loss": 0.3179, "num_tokens": 602085915.0, "step": 3154 }, { "epoch": 1.076804915514593, "grad_norm": 0.21687274584608213, "learning_rate": 3.56158320687911e-05, "loss": 0.3598, "num_tokens": 602317079.0, "step": 3155 }, { "epoch": 1.0771462706946577, "grad_norm": 0.2907740839557651, "learning_rate": 3.560950935761255e-05, "loss": 0.316, "num_tokens": 602490722.0, "step": 3156 }, { "epoch": 1.0774876258747226, "grad_norm": 0.21306993990104822, "learning_rate": 3.5603186646434e-05, "loss": 0.3025, "num_tokens": 602697632.0, "step": 3157 }, { "epoch": 1.0778289810547874, "grad_norm": 0.2177889754355914, "learning_rate": 3.559686393525544e-05, "loss": 0.3263, "num_tokens": 602876342.0, "step": 3158 }, { "epoch": 1.0781703362348525, "grad_norm": 0.21335882051808971, "learning_rate": 3.5590541224076884e-05, "loss": 0.3219, "num_tokens": 603080088.0, "step": 3159 }, { "epoch": 1.0785116914149173, "grad_norm": 0.2595549924342493, "learning_rate": 3.558421851289833e-05, "loss": 0.294, "num_tokens": 603224298.0, "step": 3160 }, { "epoch": 1.0788530465949822, "grad_norm": 0.2274481292811427, "learning_rate": 3.557789580171978e-05, "loss": 0.2905, "num_tokens": 603421790.0, "step": 3161 }, { "epoch": 1.079194401775047, "grad_norm": 0.21842148496793345, "learning_rate": 3.5571573090541225e-05, "loss": 0.3216, "num_tokens": 603637057.0, "step": 3162 }, { "epoch": 1.0795357569551118, "grad_norm": 0.22108946028869914, "learning_rate": 3.556525037936267e-05, "loss": 0.2805, "num_tokens": 603795516.0, "step": 3163 }, { "epoch": 1.0798771121351767, "grad_norm": 0.2689941435323437, "learning_rate": 3.555892766818412e-05, "loss": 0.2999, "num_tokens": 603986819.0, "step": 3164 }, { "epoch": 1.0802184673152415, "grad_norm": 0.2374704867234364, "learning_rate": 3.5552604957005566e-05, "loss": 0.3123, "num_tokens": 604180165.0, "step": 3165 }, { "epoch": 1.0805598224953064, "grad_norm": 0.21285083121308493, "learning_rate": 3.554628224582701e-05, "loss": 0.3146, "num_tokens": 604365033.0, "step": 3166 }, { "epoch": 1.0809011776753712, "grad_norm": 0.22717686788833594, "learning_rate": 3.553995953464846e-05, "loss": 0.3119, "num_tokens": 604541973.0, "step": 3167 }, { "epoch": 1.081242532855436, "grad_norm": 0.2705897810288428, "learning_rate": 3.553363682346991e-05, "loss": 0.2994, "num_tokens": 604714481.0, "step": 3168 }, { "epoch": 1.0815838880355009, "grad_norm": 0.243282428783332, "learning_rate": 3.5527314112291354e-05, "loss": 0.3388, "num_tokens": 604894820.0, "step": 3169 }, { "epoch": 1.0819252432155657, "grad_norm": 0.22753940057630556, "learning_rate": 3.5520991401112794e-05, "loss": 0.3233, "num_tokens": 605103717.0, "step": 3170 }, { "epoch": 1.0822665983956306, "grad_norm": 0.21471365082157154, "learning_rate": 3.551466868993424e-05, "loss": 0.3292, "num_tokens": 605300121.0, "step": 3171 }, { "epoch": 1.0826079535756956, "grad_norm": 0.28312558525838627, "learning_rate": 3.5508345978755695e-05, "loss": 0.3245, "num_tokens": 605490423.0, "step": 3172 }, { "epoch": 1.0829493087557605, "grad_norm": 0.19722900351637043, "learning_rate": 3.550202326757714e-05, "loss": 0.3098, "num_tokens": 605650948.0, "step": 3173 }, { "epoch": 1.0832906639358253, "grad_norm": 0.2578470217518402, "learning_rate": 3.549570055639859e-05, "loss": 0.2934, "num_tokens": 605836238.0, "step": 3174 }, { "epoch": 1.0836320191158901, "grad_norm": 0.2289120690194824, "learning_rate": 3.5489377845220036e-05, "loss": 0.3139, "num_tokens": 606036001.0, "step": 3175 }, { "epoch": 1.083973374295955, "grad_norm": 0.2033308095376492, "learning_rate": 3.5483055134041476e-05, "loss": 0.2896, "num_tokens": 606224817.0, "step": 3176 }, { "epoch": 1.0843147294760198, "grad_norm": 0.23327549980397677, "learning_rate": 3.547673242286292e-05, "loss": 0.3158, "num_tokens": 606439544.0, "step": 3177 }, { "epoch": 1.0846560846560847, "grad_norm": 0.20288307915089598, "learning_rate": 3.547040971168437e-05, "loss": 0.298, "num_tokens": 606625194.0, "step": 3178 }, { "epoch": 1.0849974398361495, "grad_norm": 0.22779960082304693, "learning_rate": 3.546408700050582e-05, "loss": 0.335, "num_tokens": 606806081.0, "step": 3179 }, { "epoch": 1.0853387950162143, "grad_norm": 0.27157091039397446, "learning_rate": 3.5457764289327264e-05, "loss": 0.3028, "num_tokens": 606974354.0, "step": 3180 }, { "epoch": 1.0856801501962792, "grad_norm": 0.2491158023650684, "learning_rate": 3.545144157814871e-05, "loss": 0.2934, "num_tokens": 607173775.0, "step": 3181 }, { "epoch": 1.086021505376344, "grad_norm": 0.20234243856781184, "learning_rate": 3.544511886697016e-05, "loss": 0.2677, "num_tokens": 607328951.0, "step": 3182 }, { "epoch": 1.0863628605564088, "grad_norm": 0.22981261383173884, "learning_rate": 3.5438796155791605e-05, "loss": 0.3212, "num_tokens": 607509091.0, "step": 3183 }, { "epoch": 1.0867042157364737, "grad_norm": 0.24500647081803353, "learning_rate": 3.543247344461305e-05, "loss": 0.3286, "num_tokens": 607697054.0, "step": 3184 }, { "epoch": 1.0870455709165388, "grad_norm": 0.25310059546969427, "learning_rate": 3.54261507334345e-05, "loss": 0.3163, "num_tokens": 607888409.0, "step": 3185 }, { "epoch": 1.0873869260966036, "grad_norm": 0.2547815242115391, "learning_rate": 3.5419828022255946e-05, "loss": 0.3232, "num_tokens": 608073335.0, "step": 3186 }, { "epoch": 1.0877282812766684, "grad_norm": 0.26489003464992406, "learning_rate": 3.541350531107739e-05, "loss": 0.3099, "num_tokens": 608236807.0, "step": 3187 }, { "epoch": 1.0880696364567333, "grad_norm": 0.2101999025046367, "learning_rate": 3.540718259989883e-05, "loss": 0.2916, "num_tokens": 608408542.0, "step": 3188 }, { "epoch": 1.088410991636798, "grad_norm": 0.21815136811094146, "learning_rate": 3.540085988872029e-05, "loss": 0.337, "num_tokens": 608648465.0, "step": 3189 }, { "epoch": 1.088752346816863, "grad_norm": 0.2515766583384288, "learning_rate": 3.5394537177541734e-05, "loss": 0.3531, "num_tokens": 608837409.0, "step": 3190 }, { "epoch": 1.0890937019969278, "grad_norm": 0.21556705315271324, "learning_rate": 3.538821446636318e-05, "loss": 0.3139, "num_tokens": 609035188.0, "step": 3191 }, { "epoch": 1.0894350571769926, "grad_norm": 0.21904238082858016, "learning_rate": 3.538189175518463e-05, "loss": 0.3336, "num_tokens": 609248428.0, "step": 3192 }, { "epoch": 1.0897764123570575, "grad_norm": 0.25522413349398576, "learning_rate": 3.5375569044006075e-05, "loss": 0.3134, "num_tokens": 609452269.0, "step": 3193 }, { "epoch": 1.0901177675371223, "grad_norm": 0.2234038084471682, "learning_rate": 3.5369246332827515e-05, "loss": 0.3167, "num_tokens": 609630575.0, "step": 3194 }, { "epoch": 1.0904591227171871, "grad_norm": 0.26436960510179747, "learning_rate": 3.536292362164896e-05, "loss": 0.2826, "num_tokens": 609808781.0, "step": 3195 }, { "epoch": 1.0908004778972522, "grad_norm": 0.2207890968018779, "learning_rate": 3.535660091047041e-05, "loss": 0.3262, "num_tokens": 609972406.0, "step": 3196 }, { "epoch": 1.091141833077317, "grad_norm": 0.24070196426055496, "learning_rate": 3.5350278199291856e-05, "loss": 0.3026, "num_tokens": 610151953.0, "step": 3197 }, { "epoch": 1.0914831882573819, "grad_norm": 0.21541797764081405, "learning_rate": 3.534395548811331e-05, "loss": 0.2869, "num_tokens": 610332883.0, "step": 3198 }, { "epoch": 1.0918245434374467, "grad_norm": 0.23725103664972938, "learning_rate": 3.5337632776934756e-05, "loss": 0.298, "num_tokens": 610524403.0, "step": 3199 }, { "epoch": 1.0921658986175116, "grad_norm": 0.2542765393699978, "learning_rate": 3.53313100657562e-05, "loss": 0.3117, "num_tokens": 610708319.0, "step": 3200 }, { "epoch": 1.0925072537975764, "grad_norm": 0.2295545142591383, "learning_rate": 3.5324987354577644e-05, "loss": 0.3409, "num_tokens": 610896697.0, "step": 3201 }, { "epoch": 1.0928486089776412, "grad_norm": 0.2542595918297673, "learning_rate": 3.531866464339909e-05, "loss": 0.2716, "num_tokens": 611042343.0, "step": 3202 }, { "epoch": 1.093189964157706, "grad_norm": 0.2341410856541575, "learning_rate": 3.531234193222054e-05, "loss": 0.3225, "num_tokens": 611254560.0, "step": 3203 }, { "epoch": 1.093531319337771, "grad_norm": 0.21911640370157653, "learning_rate": 3.5306019221041985e-05, "loss": 0.2996, "num_tokens": 611429711.0, "step": 3204 }, { "epoch": 1.0938726745178358, "grad_norm": 0.20467826132772268, "learning_rate": 3.529969650986343e-05, "loss": 0.2988, "num_tokens": 611650310.0, "step": 3205 }, { "epoch": 1.0942140296979006, "grad_norm": 0.228428278903732, "learning_rate": 3.529337379868488e-05, "loss": 0.2783, "num_tokens": 611816765.0, "step": 3206 }, { "epoch": 1.0945553848779654, "grad_norm": 0.23073016273741448, "learning_rate": 3.5287051087506326e-05, "loss": 0.3312, "num_tokens": 612024931.0, "step": 3207 }, { "epoch": 1.0948967400580303, "grad_norm": 0.24740139786662999, "learning_rate": 3.528072837632777e-05, "loss": 0.3048, "num_tokens": 612196196.0, "step": 3208 }, { "epoch": 1.0952380952380953, "grad_norm": 0.28095862029929813, "learning_rate": 3.527440566514922e-05, "loss": 0.304, "num_tokens": 612388120.0, "step": 3209 }, { "epoch": 1.0955794504181602, "grad_norm": 0.209818841620548, "learning_rate": 3.5268082953970667e-05, "loss": 0.3046, "num_tokens": 612604676.0, "step": 3210 }, { "epoch": 1.095920805598225, "grad_norm": 0.21289676030889823, "learning_rate": 3.5261760242792114e-05, "loss": 0.3114, "num_tokens": 612818687.0, "step": 3211 }, { "epoch": 1.0962621607782899, "grad_norm": 0.22155367700040968, "learning_rate": 3.5255437531613554e-05, "loss": 0.321, "num_tokens": 613010169.0, "step": 3212 }, { "epoch": 1.0966035159583547, "grad_norm": 0.23747307279670019, "learning_rate": 3.5249114820435e-05, "loss": 0.3233, "num_tokens": 613198184.0, "step": 3213 }, { "epoch": 1.0969448711384195, "grad_norm": 0.24064261126971753, "learning_rate": 3.524279210925645e-05, "loss": 0.3477, "num_tokens": 613384126.0, "step": 3214 }, { "epoch": 1.0972862263184844, "grad_norm": 0.25519813846400796, "learning_rate": 3.52364693980779e-05, "loss": 0.2954, "num_tokens": 613560083.0, "step": 3215 }, { "epoch": 1.0976275814985492, "grad_norm": 0.20911385515067626, "learning_rate": 3.523014668689935e-05, "loss": 0.3144, "num_tokens": 613741659.0, "step": 3216 }, { "epoch": 1.097968936678614, "grad_norm": 0.25917853567562504, "learning_rate": 3.5223823975720795e-05, "loss": 0.306, "num_tokens": 613920626.0, "step": 3217 }, { "epoch": 1.098310291858679, "grad_norm": 0.2077790871169598, "learning_rate": 3.5217501264542236e-05, "loss": 0.2946, "num_tokens": 614106564.0, "step": 3218 }, { "epoch": 1.0986516470387437, "grad_norm": 0.21871154771383877, "learning_rate": 3.521117855336368e-05, "loss": 0.3138, "num_tokens": 614321434.0, "step": 3219 }, { "epoch": 1.0989930022188086, "grad_norm": 0.18775094069054368, "learning_rate": 3.520485584218513e-05, "loss": 0.31, "num_tokens": 614572478.0, "step": 3220 }, { "epoch": 1.0993343573988734, "grad_norm": 0.20280154571189218, "learning_rate": 3.5198533131006577e-05, "loss": 0.2875, "num_tokens": 614750577.0, "step": 3221 }, { "epoch": 1.0996757125789385, "grad_norm": 0.22900445437952394, "learning_rate": 3.5192210419828024e-05, "loss": 0.3184, "num_tokens": 615003180.0, "step": 3222 }, { "epoch": 1.1000170677590033, "grad_norm": 0.2238237930972459, "learning_rate": 3.518588770864947e-05, "loss": 0.2843, "num_tokens": 615184112.0, "step": 3223 }, { "epoch": 1.1003584229390682, "grad_norm": 0.20966802806944113, "learning_rate": 3.517956499747092e-05, "loss": 0.2643, "num_tokens": 615367250.0, "step": 3224 }, { "epoch": 1.100699778119133, "grad_norm": 0.20225838298692844, "learning_rate": 3.5173242286292365e-05, "loss": 0.2968, "num_tokens": 615572390.0, "step": 3225 }, { "epoch": 1.1010411332991978, "grad_norm": 0.24271433569579554, "learning_rate": 3.516691957511381e-05, "loss": 0.3225, "num_tokens": 615765956.0, "step": 3226 }, { "epoch": 1.1013824884792627, "grad_norm": 0.23246874798561884, "learning_rate": 3.516059686393526e-05, "loss": 0.327, "num_tokens": 615953566.0, "step": 3227 }, { "epoch": 1.1017238436593275, "grad_norm": 0.2281600611783141, "learning_rate": 3.5154274152756705e-05, "loss": 0.2799, "num_tokens": 616116963.0, "step": 3228 }, { "epoch": 1.1020651988393924, "grad_norm": 0.22209072602953697, "learning_rate": 3.514795144157815e-05, "loss": 0.3084, "num_tokens": 616325289.0, "step": 3229 }, { "epoch": 1.1024065540194572, "grad_norm": 0.21369217453691347, "learning_rate": 3.514162873039959e-05, "loss": 0.3189, "num_tokens": 616538381.0, "step": 3230 }, { "epoch": 1.102747909199522, "grad_norm": 0.20652871948698617, "learning_rate": 3.513530601922104e-05, "loss": 0.3162, "num_tokens": 616761561.0, "step": 3231 }, { "epoch": 1.1030892643795869, "grad_norm": 0.2327358219885493, "learning_rate": 3.512898330804249e-05, "loss": 0.3193, "num_tokens": 616931074.0, "step": 3232 }, { "epoch": 1.1034306195596517, "grad_norm": 0.20778354367022903, "learning_rate": 3.512266059686394e-05, "loss": 0.3406, "num_tokens": 617144323.0, "step": 3233 }, { "epoch": 1.1037719747397168, "grad_norm": 0.2449312503647123, "learning_rate": 3.511633788568539e-05, "loss": 0.2965, "num_tokens": 617301453.0, "step": 3234 }, { "epoch": 1.1041133299197816, "grad_norm": 0.3027988229019065, "learning_rate": 3.5110015174506834e-05, "loss": 0.2951, "num_tokens": 617485288.0, "step": 3235 }, { "epoch": 1.1044546850998465, "grad_norm": 0.2153628814015993, "learning_rate": 3.5103692463328275e-05, "loss": 0.3308, "num_tokens": 617682259.0, "step": 3236 }, { "epoch": 1.1047960402799113, "grad_norm": 0.2234960134474642, "learning_rate": 3.509736975214972e-05, "loss": 0.2957, "num_tokens": 617848847.0, "step": 3237 }, { "epoch": 1.1051373954599761, "grad_norm": 0.22960631087176808, "learning_rate": 3.509104704097117e-05, "loss": 0.2902, "num_tokens": 618034759.0, "step": 3238 }, { "epoch": 1.105478750640041, "grad_norm": 0.23477816858075218, "learning_rate": 3.5084724329792615e-05, "loss": 0.2888, "num_tokens": 618175550.0, "step": 3239 }, { "epoch": 1.1058201058201058, "grad_norm": 0.237376872417835, "learning_rate": 3.507840161861406e-05, "loss": 0.284, "num_tokens": 618334020.0, "step": 3240 }, { "epoch": 1.1061614610001707, "grad_norm": 0.30742758688170807, "learning_rate": 3.507207890743551e-05, "loss": 0.3372, "num_tokens": 618501165.0, "step": 3241 }, { "epoch": 1.1065028161802355, "grad_norm": 0.20355375523587452, "learning_rate": 3.5065756196256956e-05, "loss": 0.296, "num_tokens": 618727249.0, "step": 3242 }, { "epoch": 1.1068441713603003, "grad_norm": 0.22586507971806655, "learning_rate": 3.5059433485078403e-05, "loss": 0.3277, "num_tokens": 618927981.0, "step": 3243 }, { "epoch": 1.1071855265403652, "grad_norm": 0.20683318919981095, "learning_rate": 3.505311077389985e-05, "loss": 0.3266, "num_tokens": 619125488.0, "step": 3244 }, { "epoch": 1.10752688172043, "grad_norm": 0.22963132073782616, "learning_rate": 3.50467880627213e-05, "loss": 0.2918, "num_tokens": 619346405.0, "step": 3245 }, { "epoch": 1.107868236900495, "grad_norm": 0.2272835740343239, "learning_rate": 3.5040465351542744e-05, "loss": 0.3259, "num_tokens": 619554195.0, "step": 3246 }, { "epoch": 1.10820959208056, "grad_norm": 0.19995520482179824, "learning_rate": 3.503414264036419e-05, "loss": 0.2981, "num_tokens": 619763256.0, "step": 3247 }, { "epoch": 1.1085509472606248, "grad_norm": 0.23904876501672967, "learning_rate": 3.502781992918563e-05, "loss": 0.3008, "num_tokens": 619941351.0, "step": 3248 }, { "epoch": 1.1088923024406896, "grad_norm": 0.2554987889251196, "learning_rate": 3.5021497218007085e-05, "loss": 0.2989, "num_tokens": 620129377.0, "step": 3249 }, { "epoch": 1.1092336576207544, "grad_norm": 0.22892107058861505, "learning_rate": 3.501517450682853e-05, "loss": 0.316, "num_tokens": 620360986.0, "step": 3250 }, { "epoch": 1.1095750128008193, "grad_norm": 0.220432662048042, "learning_rate": 3.500885179564998e-05, "loss": 0.2735, "num_tokens": 620504824.0, "step": 3251 }, { "epoch": 1.109916367980884, "grad_norm": 0.2413171225788163, "learning_rate": 3.5002529084471426e-05, "loss": 0.309, "num_tokens": 620674063.0, "step": 3252 }, { "epoch": 1.110257723160949, "grad_norm": 0.2592053904093019, "learning_rate": 3.4996206373292866e-05, "loss": 0.3238, "num_tokens": 620850712.0, "step": 3253 }, { "epoch": 1.1105990783410138, "grad_norm": 0.21148872401700955, "learning_rate": 3.4989883662114313e-05, "loss": 0.311, "num_tokens": 621058242.0, "step": 3254 }, { "epoch": 1.1109404335210786, "grad_norm": 0.2545481313831862, "learning_rate": 3.498356095093576e-05, "loss": 0.2961, "num_tokens": 621222974.0, "step": 3255 }, { "epoch": 1.1112817887011435, "grad_norm": 0.20667125677596157, "learning_rate": 3.497723823975721e-05, "loss": 0.3132, "num_tokens": 621437986.0, "step": 3256 }, { "epoch": 1.1116231438812083, "grad_norm": 0.2506871593061072, "learning_rate": 3.4970915528578654e-05, "loss": 0.3218, "num_tokens": 621615980.0, "step": 3257 }, { "epoch": 1.1119644990612731, "grad_norm": 0.23435633635996692, "learning_rate": 3.49645928174001e-05, "loss": 0.289, "num_tokens": 621795809.0, "step": 3258 }, { "epoch": 1.1123058542413382, "grad_norm": 0.2374789660618789, "learning_rate": 3.495827010622155e-05, "loss": 0.289, "num_tokens": 621979493.0, "step": 3259 }, { "epoch": 1.112647209421403, "grad_norm": 0.2130725136069637, "learning_rate": 3.4951947395042995e-05, "loss": 0.2983, "num_tokens": 622180469.0, "step": 3260 }, { "epoch": 1.112988564601468, "grad_norm": 0.22939990382249062, "learning_rate": 3.494562468386444e-05, "loss": 0.3004, "num_tokens": 622351549.0, "step": 3261 }, { "epoch": 1.1133299197815327, "grad_norm": 0.24422465019504522, "learning_rate": 3.493930197268589e-05, "loss": 0.2996, "num_tokens": 622546324.0, "step": 3262 }, { "epoch": 1.1136712749615976, "grad_norm": 0.23798140700484002, "learning_rate": 3.4932979261507336e-05, "loss": 0.3182, "num_tokens": 622710160.0, "step": 3263 }, { "epoch": 1.1140126301416624, "grad_norm": 0.21247240868037454, "learning_rate": 3.492665655032878e-05, "loss": 0.3161, "num_tokens": 622947543.0, "step": 3264 }, { "epoch": 1.1143539853217272, "grad_norm": 0.25219793924454725, "learning_rate": 3.4920333839150223e-05, "loss": 0.3422, "num_tokens": 623122793.0, "step": 3265 }, { "epoch": 1.114695340501792, "grad_norm": 0.22096797385081934, "learning_rate": 3.491401112797168e-05, "loss": 0.3132, "num_tokens": 623320251.0, "step": 3266 }, { "epoch": 1.115036695681857, "grad_norm": 0.21886374913763101, "learning_rate": 3.4907688416793124e-05, "loss": 0.3035, "num_tokens": 623563132.0, "step": 3267 }, { "epoch": 1.1153780508619218, "grad_norm": 0.21106134048529027, "learning_rate": 3.490136570561457e-05, "loss": 0.3255, "num_tokens": 623798615.0, "step": 3268 }, { "epoch": 1.1157194060419866, "grad_norm": 0.2041782608798419, "learning_rate": 3.489504299443602e-05, "loss": 0.3003, "num_tokens": 623972222.0, "step": 3269 }, { "epoch": 1.1160607612220514, "grad_norm": 0.22889317893324201, "learning_rate": 3.4888720283257465e-05, "loss": 0.3, "num_tokens": 624170936.0, "step": 3270 }, { "epoch": 1.1164021164021163, "grad_norm": 0.2480473974053294, "learning_rate": 3.4882397572078905e-05, "loss": 0.3358, "num_tokens": 624372021.0, "step": 3271 }, { "epoch": 1.1167434715821813, "grad_norm": 0.2047611873620189, "learning_rate": 3.487607486090035e-05, "loss": 0.3299, "num_tokens": 624562618.0, "step": 3272 }, { "epoch": 1.1170848267622462, "grad_norm": 0.38203518665423863, "learning_rate": 3.48697521497218e-05, "loss": 0.321, "num_tokens": 624795922.0, "step": 3273 }, { "epoch": 1.117426181942311, "grad_norm": 0.20473768088660685, "learning_rate": 3.4863429438543246e-05, "loss": 0.299, "num_tokens": 624971453.0, "step": 3274 }, { "epoch": 1.1177675371223759, "grad_norm": 0.22820109704393646, "learning_rate": 3.48571067273647e-05, "loss": 0.3168, "num_tokens": 625142858.0, "step": 3275 }, { "epoch": 1.1181088923024407, "grad_norm": 0.24762909111802314, "learning_rate": 3.485078401618615e-05, "loss": 0.2925, "num_tokens": 625325607.0, "step": 3276 }, { "epoch": 1.1184502474825055, "grad_norm": 0.17785995932603574, "learning_rate": 3.484446130500759e-05, "loss": 0.3122, "num_tokens": 625536836.0, "step": 3277 }, { "epoch": 1.1187916026625704, "grad_norm": 0.21233005346180847, "learning_rate": 3.4838138593829034e-05, "loss": 0.3209, "num_tokens": 625775061.0, "step": 3278 }, { "epoch": 1.1191329578426352, "grad_norm": 0.2271245489317066, "learning_rate": 3.483181588265048e-05, "loss": 0.3227, "num_tokens": 625944767.0, "step": 3279 }, { "epoch": 1.1194743130227, "grad_norm": 0.2274091173203267, "learning_rate": 3.482549317147193e-05, "loss": 0.2926, "num_tokens": 626118205.0, "step": 3280 }, { "epoch": 1.119815668202765, "grad_norm": 0.207558404289179, "learning_rate": 3.4819170460293375e-05, "loss": 0.2974, "num_tokens": 626307594.0, "step": 3281 }, { "epoch": 1.1201570233828297, "grad_norm": 0.2477025944837203, "learning_rate": 3.481284774911482e-05, "loss": 0.2809, "num_tokens": 626501095.0, "step": 3282 }, { "epoch": 1.1204983785628948, "grad_norm": 0.2568766661268885, "learning_rate": 3.480652503793627e-05, "loss": 0.3091, "num_tokens": 626703573.0, "step": 3283 }, { "epoch": 1.1208397337429596, "grad_norm": 0.2234350453233713, "learning_rate": 3.4800202326757716e-05, "loss": 0.3025, "num_tokens": 626906771.0, "step": 3284 }, { "epoch": 1.1211810889230245, "grad_norm": 0.20815639679116404, "learning_rate": 3.479387961557916e-05, "loss": 0.3255, "num_tokens": 627104469.0, "step": 3285 }, { "epoch": 1.1215224441030893, "grad_norm": 0.253657416458569, "learning_rate": 3.478755690440061e-05, "loss": 0.3056, "num_tokens": 627270479.0, "step": 3286 }, { "epoch": 1.1218637992831542, "grad_norm": 0.22699920619118621, "learning_rate": 3.478123419322206e-05, "loss": 0.287, "num_tokens": 627434502.0, "step": 3287 }, { "epoch": 1.122205154463219, "grad_norm": 0.2493254270662768, "learning_rate": 3.4774911482043504e-05, "loss": 0.3163, "num_tokens": 627637704.0, "step": 3288 }, { "epoch": 1.1225465096432838, "grad_norm": 0.22856103923459223, "learning_rate": 3.4768588770864944e-05, "loss": 0.3065, "num_tokens": 627849213.0, "step": 3289 }, { "epoch": 1.1228878648233487, "grad_norm": 0.20224174261117656, "learning_rate": 3.476226605968639e-05, "loss": 0.3403, "num_tokens": 628047006.0, "step": 3290 }, { "epoch": 1.1232292200034135, "grad_norm": 0.24869863188350974, "learning_rate": 3.475594334850784e-05, "loss": 0.315, "num_tokens": 628232277.0, "step": 3291 }, { "epoch": 1.1235705751834784, "grad_norm": 0.2448675430224728, "learning_rate": 3.474962063732929e-05, "loss": 0.3208, "num_tokens": 628427803.0, "step": 3292 }, { "epoch": 1.1239119303635432, "grad_norm": 0.22669286261873112, "learning_rate": 3.474329792615074e-05, "loss": 0.2825, "num_tokens": 628603422.0, "step": 3293 }, { "epoch": 1.124253285543608, "grad_norm": 0.22170640982014136, "learning_rate": 3.4736975214972186e-05, "loss": 0.3091, "num_tokens": 628789487.0, "step": 3294 }, { "epoch": 1.1245946407236729, "grad_norm": 0.2311297359472868, "learning_rate": 3.4730652503793626e-05, "loss": 0.2948, "num_tokens": 628966909.0, "step": 3295 }, { "epoch": 1.124935995903738, "grad_norm": 0.23716926614289555, "learning_rate": 3.472432979261507e-05, "loss": 0.3005, "num_tokens": 629187469.0, "step": 3296 }, { "epoch": 1.1252773510838028, "grad_norm": 0.20478780956043316, "learning_rate": 3.471800708143652e-05, "loss": 0.3093, "num_tokens": 629409029.0, "step": 3297 }, { "epoch": 1.1256187062638676, "grad_norm": 0.21672410868708278, "learning_rate": 3.471168437025797e-05, "loss": 0.2922, "num_tokens": 629566253.0, "step": 3298 }, { "epoch": 1.1259600614439325, "grad_norm": 0.22841833659022712, "learning_rate": 3.4705361659079414e-05, "loss": 0.2932, "num_tokens": 629723705.0, "step": 3299 }, { "epoch": 1.1263014166239973, "grad_norm": 0.2999738711290827, "learning_rate": 3.469903894790086e-05, "loss": 0.3633, "num_tokens": 629922112.0, "step": 3300 }, { "epoch": 1.1266427718040621, "grad_norm": 0.23387220227518127, "learning_rate": 3.469271623672231e-05, "loss": 0.3215, "num_tokens": 630105236.0, "step": 3301 }, { "epoch": 1.126984126984127, "grad_norm": 0.237944993238049, "learning_rate": 3.4686393525543755e-05, "loss": 0.3337, "num_tokens": 630285159.0, "step": 3302 }, { "epoch": 1.1273254821641918, "grad_norm": 0.2335531760448199, "learning_rate": 3.46800708143652e-05, "loss": 0.304, "num_tokens": 630473077.0, "step": 3303 }, { "epoch": 1.1276668373442567, "grad_norm": 0.24100845600738938, "learning_rate": 3.467374810318665e-05, "loss": 0.3068, "num_tokens": 630644522.0, "step": 3304 }, { "epoch": 1.1280081925243215, "grad_norm": 0.22668761046232638, "learning_rate": 3.4667425392008096e-05, "loss": 0.3147, "num_tokens": 630845334.0, "step": 3305 }, { "epoch": 1.1283495477043863, "grad_norm": 0.24416698322088115, "learning_rate": 3.466110268082954e-05, "loss": 0.3102, "num_tokens": 631036584.0, "step": 3306 }, { "epoch": 1.1286909028844512, "grad_norm": 0.2243578295782773, "learning_rate": 3.465477996965098e-05, "loss": 0.3186, "num_tokens": 631221826.0, "step": 3307 }, { "epoch": 1.129032258064516, "grad_norm": 0.25537726437726427, "learning_rate": 3.464845725847243e-05, "loss": 0.3195, "num_tokens": 631383590.0, "step": 3308 }, { "epoch": 1.129373613244581, "grad_norm": 0.2329076270909501, "learning_rate": 3.4642134547293884e-05, "loss": 0.2864, "num_tokens": 631572495.0, "step": 3309 }, { "epoch": 1.129714968424646, "grad_norm": 0.21193604536894345, "learning_rate": 3.463581183611533e-05, "loss": 0.3042, "num_tokens": 631786429.0, "step": 3310 }, { "epoch": 1.1300563236047108, "grad_norm": 0.25729147079501147, "learning_rate": 3.462948912493678e-05, "loss": 0.3047, "num_tokens": 631939882.0, "step": 3311 }, { "epoch": 1.1303976787847756, "grad_norm": 0.25139689955400224, "learning_rate": 3.4623166413758225e-05, "loss": 0.2966, "num_tokens": 632084514.0, "step": 3312 }, { "epoch": 1.1307390339648404, "grad_norm": 0.23951342650992025, "learning_rate": 3.4616843702579665e-05, "loss": 0.2871, "num_tokens": 632288258.0, "step": 3313 }, { "epoch": 1.1310803891449053, "grad_norm": 0.21832247696328985, "learning_rate": 3.461052099140111e-05, "loss": 0.3016, "num_tokens": 632460819.0, "step": 3314 }, { "epoch": 1.1314217443249701, "grad_norm": 0.2392131323538433, "learning_rate": 3.460419828022256e-05, "loss": 0.2873, "num_tokens": 632634376.0, "step": 3315 }, { "epoch": 1.131763099505035, "grad_norm": 0.24586171954218403, "learning_rate": 3.4597875569044006e-05, "loss": 0.2922, "num_tokens": 632853274.0, "step": 3316 }, { "epoch": 1.1321044546850998, "grad_norm": 0.18921968056380467, "learning_rate": 3.459155285786545e-05, "loss": 0.2829, "num_tokens": 633014349.0, "step": 3317 }, { "epoch": 1.1324458098651646, "grad_norm": 0.24558767836588963, "learning_rate": 3.45852301466869e-05, "loss": 0.2599, "num_tokens": 633171555.0, "step": 3318 }, { "epoch": 1.1327871650452295, "grad_norm": 0.2329760042025088, "learning_rate": 3.457890743550835e-05, "loss": 0.2744, "num_tokens": 633360727.0, "step": 3319 }, { "epoch": 1.1331285202252945, "grad_norm": 0.2180382588424119, "learning_rate": 3.4572584724329794e-05, "loss": 0.3058, "num_tokens": 633541307.0, "step": 3320 }, { "epoch": 1.1334698754053594, "grad_norm": 0.22878471534214595, "learning_rate": 3.456626201315124e-05, "loss": 0.2845, "num_tokens": 633673793.0, "step": 3321 }, { "epoch": 1.1338112305854242, "grad_norm": 0.9964595645305091, "learning_rate": 3.455993930197269e-05, "loss": 0.3013, "num_tokens": 633855234.0, "step": 3322 }, { "epoch": 1.134152585765489, "grad_norm": 0.23354153934921593, "learning_rate": 3.4553616590794135e-05, "loss": 0.2864, "num_tokens": 634023967.0, "step": 3323 }, { "epoch": 1.134493940945554, "grad_norm": 0.22739871678001314, "learning_rate": 3.454729387961558e-05, "loss": 0.3209, "num_tokens": 634247534.0, "step": 3324 }, { "epoch": 1.1348352961256187, "grad_norm": 0.20844670837785093, "learning_rate": 3.454097116843702e-05, "loss": 0.3066, "num_tokens": 634422310.0, "step": 3325 }, { "epoch": 1.1351766513056836, "grad_norm": 0.2557940845147227, "learning_rate": 3.4534648457258476e-05, "loss": 0.282, "num_tokens": 634581410.0, "step": 3326 }, { "epoch": 1.1355180064857484, "grad_norm": 0.23731195424773013, "learning_rate": 3.452832574607992e-05, "loss": 0.3236, "num_tokens": 634773264.0, "step": 3327 }, { "epoch": 1.1358593616658132, "grad_norm": 0.21972777693535175, "learning_rate": 3.452200303490137e-05, "loss": 0.2911, "num_tokens": 634927977.0, "step": 3328 }, { "epoch": 1.136200716845878, "grad_norm": 0.242746504508239, "learning_rate": 3.451568032372282e-05, "loss": 0.3126, "num_tokens": 635099728.0, "step": 3329 }, { "epoch": 1.136542072025943, "grad_norm": 0.22974141487005714, "learning_rate": 3.4509357612544264e-05, "loss": 0.3123, "num_tokens": 635298553.0, "step": 3330 }, { "epoch": 1.1368834272060078, "grad_norm": 0.2383068340442954, "learning_rate": 3.4503034901365704e-05, "loss": 0.3173, "num_tokens": 635478335.0, "step": 3331 }, { "epoch": 1.1372247823860726, "grad_norm": 0.232396482787736, "learning_rate": 3.449671219018715e-05, "loss": 0.3177, "num_tokens": 635671509.0, "step": 3332 }, { "epoch": 1.1375661375661377, "grad_norm": 0.23108863099043195, "learning_rate": 3.44903894790086e-05, "loss": 0.3153, "num_tokens": 635849258.0, "step": 3333 }, { "epoch": 1.1379074927462025, "grad_norm": 0.24621685880012012, "learning_rate": 3.4484066767830045e-05, "loss": 0.3044, "num_tokens": 636018755.0, "step": 3334 }, { "epoch": 1.1382488479262673, "grad_norm": 0.2562323146088071, "learning_rate": 3.447774405665149e-05, "loss": 0.2957, "num_tokens": 636192419.0, "step": 3335 }, { "epoch": 1.1385902031063322, "grad_norm": 0.22866245792239587, "learning_rate": 3.4471421345472946e-05, "loss": 0.3034, "num_tokens": 636386336.0, "step": 3336 }, { "epoch": 1.138931558286397, "grad_norm": 0.23621288044638855, "learning_rate": 3.4465098634294386e-05, "loss": 0.3288, "num_tokens": 636561820.0, "step": 3337 }, { "epoch": 1.1392729134664619, "grad_norm": 0.2635263935385776, "learning_rate": 3.445877592311583e-05, "loss": 0.325, "num_tokens": 636744582.0, "step": 3338 }, { "epoch": 1.1396142686465267, "grad_norm": 0.2282778508637764, "learning_rate": 3.445245321193728e-05, "loss": 0.3009, "num_tokens": 636940040.0, "step": 3339 }, { "epoch": 1.1399556238265915, "grad_norm": 0.23164177395871913, "learning_rate": 3.444613050075873e-05, "loss": 0.3001, "num_tokens": 637121909.0, "step": 3340 }, { "epoch": 1.1402969790066564, "grad_norm": 0.2185293869872514, "learning_rate": 3.4439807789580174e-05, "loss": 0.3176, "num_tokens": 637330137.0, "step": 3341 }, { "epoch": 1.1406383341867212, "grad_norm": 0.22709918053005904, "learning_rate": 3.443348507840162e-05, "loss": 0.2972, "num_tokens": 637517900.0, "step": 3342 }, { "epoch": 1.140979689366786, "grad_norm": 0.21184819974240918, "learning_rate": 3.442716236722307e-05, "loss": 0.2974, "num_tokens": 637724889.0, "step": 3343 }, { "epoch": 1.141321044546851, "grad_norm": 0.2356841861870132, "learning_rate": 3.4420839656044515e-05, "loss": 0.3335, "num_tokens": 637927952.0, "step": 3344 }, { "epoch": 1.1416623997269157, "grad_norm": 0.22992190579406865, "learning_rate": 3.441451694486596e-05, "loss": 0.3193, "num_tokens": 638102424.0, "step": 3345 }, { "epoch": 1.1420037549069808, "grad_norm": 0.24640862750135253, "learning_rate": 3.440819423368741e-05, "loss": 0.3061, "num_tokens": 638312200.0, "step": 3346 }, { "epoch": 1.1423451100870456, "grad_norm": 0.23318618796267518, "learning_rate": 3.4401871522508856e-05, "loss": 0.3004, "num_tokens": 638494441.0, "step": 3347 }, { "epoch": 1.1426864652671105, "grad_norm": 0.2583313370524409, "learning_rate": 3.43955488113303e-05, "loss": 0.2933, "num_tokens": 638654248.0, "step": 3348 }, { "epoch": 1.1430278204471753, "grad_norm": 0.23007753598938396, "learning_rate": 3.438922610015174e-05, "loss": 0.3255, "num_tokens": 638878397.0, "step": 3349 }, { "epoch": 1.1433691756272402, "grad_norm": 0.23749793669527297, "learning_rate": 3.438290338897319e-05, "loss": 0.3, "num_tokens": 639078474.0, "step": 3350 }, { "epoch": 1.143710530807305, "grad_norm": 0.23357516695282496, "learning_rate": 3.437658067779464e-05, "loss": 0.3199, "num_tokens": 639296937.0, "step": 3351 }, { "epoch": 1.1440518859873698, "grad_norm": 0.211977058827581, "learning_rate": 3.437025796661609e-05, "loss": 0.3323, "num_tokens": 639522631.0, "step": 3352 }, { "epoch": 1.1443932411674347, "grad_norm": 0.24950247139848428, "learning_rate": 3.436393525543754e-05, "loss": 0.3267, "num_tokens": 639709210.0, "step": 3353 }, { "epoch": 1.1447345963474995, "grad_norm": 0.2092908697494468, "learning_rate": 3.4357612544258985e-05, "loss": 0.3011, "num_tokens": 639902515.0, "step": 3354 }, { "epoch": 1.1450759515275644, "grad_norm": 0.2281204051276, "learning_rate": 3.4351289833080425e-05, "loss": 0.3239, "num_tokens": 640087563.0, "step": 3355 }, { "epoch": 1.1454173067076292, "grad_norm": 0.216898105465086, "learning_rate": 3.434496712190187e-05, "loss": 0.3055, "num_tokens": 640281378.0, "step": 3356 }, { "epoch": 1.1457586618876943, "grad_norm": 0.2209400099534071, "learning_rate": 3.433864441072332e-05, "loss": 0.3183, "num_tokens": 640464932.0, "step": 3357 }, { "epoch": 1.1461000170677589, "grad_norm": 0.28895792094318257, "learning_rate": 3.4332321699544766e-05, "loss": 0.2983, "num_tokens": 640641917.0, "step": 3358 }, { "epoch": 1.146441372247824, "grad_norm": 0.22213144383918465, "learning_rate": 3.432599898836621e-05, "loss": 0.298, "num_tokens": 640834885.0, "step": 3359 }, { "epoch": 1.1467827274278888, "grad_norm": 0.21425782701922969, "learning_rate": 3.431967627718766e-05, "loss": 0.3018, "num_tokens": 641004761.0, "step": 3360 }, { "epoch": 1.1471240826079536, "grad_norm": 0.2444267671108613, "learning_rate": 3.431335356600911e-05, "loss": 0.3211, "num_tokens": 641257735.0, "step": 3361 }, { "epoch": 1.1474654377880185, "grad_norm": 0.20810666760200064, "learning_rate": 3.4307030854830554e-05, "loss": 0.31, "num_tokens": 641426735.0, "step": 3362 }, { "epoch": 1.1478067929680833, "grad_norm": 0.24652354201646054, "learning_rate": 3.4300708143652e-05, "loss": 0.2907, "num_tokens": 641594874.0, "step": 3363 }, { "epoch": 1.1481481481481481, "grad_norm": 0.2135786589088307, "learning_rate": 3.429438543247345e-05, "loss": 0.2999, "num_tokens": 641791883.0, "step": 3364 }, { "epoch": 1.148489503328213, "grad_norm": 0.25850755633519246, "learning_rate": 3.4288062721294895e-05, "loss": 0.3112, "num_tokens": 642008154.0, "step": 3365 }, { "epoch": 1.1488308585082778, "grad_norm": 0.21907408657166838, "learning_rate": 3.428174001011634e-05, "loss": 0.3215, "num_tokens": 642211600.0, "step": 3366 }, { "epoch": 1.1491722136883427, "grad_norm": 0.20396751183252523, "learning_rate": 3.427541729893778e-05, "loss": 0.2921, "num_tokens": 642402907.0, "step": 3367 }, { "epoch": 1.1495135688684075, "grad_norm": 0.2408408262288343, "learning_rate": 3.426909458775923e-05, "loss": 0.297, "num_tokens": 642577367.0, "step": 3368 }, { "epoch": 1.1498549240484723, "grad_norm": 0.2500812431000802, "learning_rate": 3.426277187658068e-05, "loss": 0.3161, "num_tokens": 642783305.0, "step": 3369 }, { "epoch": 1.1501962792285374, "grad_norm": 0.2331587100154555, "learning_rate": 3.425644916540213e-05, "loss": 0.3171, "num_tokens": 642998185.0, "step": 3370 }, { "epoch": 1.1505376344086022, "grad_norm": 0.24864760777920475, "learning_rate": 3.4250126454223577e-05, "loss": 0.3155, "num_tokens": 643175481.0, "step": 3371 }, { "epoch": 1.150878989588667, "grad_norm": 0.27332727223497455, "learning_rate": 3.4243803743045024e-05, "loss": 0.3017, "num_tokens": 643361390.0, "step": 3372 }, { "epoch": 1.151220344768732, "grad_norm": 0.20100768488319098, "learning_rate": 3.4237481031866464e-05, "loss": 0.334, "num_tokens": 643573565.0, "step": 3373 }, { "epoch": 1.1515616999487968, "grad_norm": 0.21971069176231295, "learning_rate": 3.423115832068791e-05, "loss": 0.3451, "num_tokens": 643781725.0, "step": 3374 }, { "epoch": 1.1519030551288616, "grad_norm": 0.26283637485570976, "learning_rate": 3.422483560950936e-05, "loss": 0.3193, "num_tokens": 643981175.0, "step": 3375 }, { "epoch": 1.1522444103089264, "grad_norm": 0.21591818730441667, "learning_rate": 3.4218512898330805e-05, "loss": 0.3185, "num_tokens": 644195693.0, "step": 3376 }, { "epoch": 1.1525857654889913, "grad_norm": 0.221133506058564, "learning_rate": 3.421219018715225e-05, "loss": 0.3273, "num_tokens": 644409200.0, "step": 3377 }, { "epoch": 1.1529271206690561, "grad_norm": 0.2082889113209553, "learning_rate": 3.42058674759737e-05, "loss": 0.3117, "num_tokens": 644608744.0, "step": 3378 }, { "epoch": 1.153268475849121, "grad_norm": 0.2221661253472987, "learning_rate": 3.4199544764795146e-05, "loss": 0.2781, "num_tokens": 644793149.0, "step": 3379 }, { "epoch": 1.1536098310291858, "grad_norm": 0.22209027555044863, "learning_rate": 3.419322205361659e-05, "loss": 0.2903, "num_tokens": 644998530.0, "step": 3380 }, { "epoch": 1.1539511862092506, "grad_norm": 0.21564228289470946, "learning_rate": 3.418689934243804e-05, "loss": 0.3064, "num_tokens": 645207919.0, "step": 3381 }, { "epoch": 1.1542925413893155, "grad_norm": 0.24349604874149822, "learning_rate": 3.418057663125949e-05, "loss": 0.3043, "num_tokens": 645408514.0, "step": 3382 }, { "epoch": 1.1546338965693805, "grad_norm": 0.21838990290073768, "learning_rate": 3.4174253920080934e-05, "loss": 0.3111, "num_tokens": 645624121.0, "step": 3383 }, { "epoch": 1.1549752517494454, "grad_norm": 0.20855391193541353, "learning_rate": 3.416793120890238e-05, "loss": 0.2809, "num_tokens": 645834615.0, "step": 3384 }, { "epoch": 1.1553166069295102, "grad_norm": 0.21540092944950331, "learning_rate": 3.416160849772382e-05, "loss": 0.3229, "num_tokens": 646042036.0, "step": 3385 }, { "epoch": 1.155657962109575, "grad_norm": 0.25089238118040863, "learning_rate": 3.4155285786545275e-05, "loss": 0.3065, "num_tokens": 646233334.0, "step": 3386 }, { "epoch": 1.15599931728964, "grad_norm": 0.22704205327445745, "learning_rate": 3.414896307536672e-05, "loss": 0.3016, "num_tokens": 646410623.0, "step": 3387 }, { "epoch": 1.1563406724697047, "grad_norm": 0.23768343727723482, "learning_rate": 3.414264036418817e-05, "loss": 0.2771, "num_tokens": 646577360.0, "step": 3388 }, { "epoch": 1.1566820276497696, "grad_norm": 0.2374881673755105, "learning_rate": 3.4136317653009616e-05, "loss": 0.3233, "num_tokens": 646757047.0, "step": 3389 }, { "epoch": 1.1570233828298344, "grad_norm": 0.24493637927878117, "learning_rate": 3.4129994941831056e-05, "loss": 0.3312, "num_tokens": 646944377.0, "step": 3390 }, { "epoch": 1.1573647380098993, "grad_norm": 0.2598143589069903, "learning_rate": 3.41236722306525e-05, "loss": 0.3205, "num_tokens": 647119767.0, "step": 3391 }, { "epoch": 1.157706093189964, "grad_norm": 0.2406149219276119, "learning_rate": 3.411734951947395e-05, "loss": 0.305, "num_tokens": 647297765.0, "step": 3392 }, { "epoch": 1.158047448370029, "grad_norm": 0.24447550128397877, "learning_rate": 3.41110268082954e-05, "loss": 0.3149, "num_tokens": 647488031.0, "step": 3393 }, { "epoch": 1.158388803550094, "grad_norm": 0.23470766066653828, "learning_rate": 3.4104704097116844e-05, "loss": 0.2947, "num_tokens": 647688838.0, "step": 3394 }, { "epoch": 1.1587301587301586, "grad_norm": 0.21381410131281584, "learning_rate": 3.409838138593829e-05, "loss": 0.3141, "num_tokens": 647907482.0, "step": 3395 }, { "epoch": 1.1590715139102237, "grad_norm": 0.20204357257188, "learning_rate": 3.409205867475974e-05, "loss": 0.2949, "num_tokens": 648064626.0, "step": 3396 }, { "epoch": 1.1594128690902885, "grad_norm": 0.2959923854294666, "learning_rate": 3.4085735963581185e-05, "loss": 0.3133, "num_tokens": 648246478.0, "step": 3397 }, { "epoch": 1.1597542242703534, "grad_norm": 0.25652437744865003, "learning_rate": 3.407941325240263e-05, "loss": 0.3148, "num_tokens": 648451610.0, "step": 3398 }, { "epoch": 1.1600955794504182, "grad_norm": 0.20701456135408183, "learning_rate": 3.407309054122408e-05, "loss": 0.3145, "num_tokens": 648629355.0, "step": 3399 }, { "epoch": 1.160436934630483, "grad_norm": 0.23602280307704354, "learning_rate": 3.4066767830045526e-05, "loss": 0.3181, "num_tokens": 648807971.0, "step": 3400 }, { "epoch": 1.1607782898105479, "grad_norm": 0.2521618521101936, "learning_rate": 3.406044511886697e-05, "loss": 0.3014, "num_tokens": 648964162.0, "step": 3401 }, { "epoch": 1.1611196449906127, "grad_norm": 0.20994602720837438, "learning_rate": 3.405412240768841e-05, "loss": 0.3319, "num_tokens": 649183294.0, "step": 3402 }, { "epoch": 1.1614610001706775, "grad_norm": 0.2584988568732664, "learning_rate": 3.4047799696509867e-05, "loss": 0.31, "num_tokens": 649377280.0, "step": 3403 }, { "epoch": 1.1618023553507424, "grad_norm": 0.22374808286575942, "learning_rate": 3.4041476985331313e-05, "loss": 0.2998, "num_tokens": 649557533.0, "step": 3404 }, { "epoch": 1.1621437105308072, "grad_norm": 0.21853671169303535, "learning_rate": 3.403515427415276e-05, "loss": 0.3237, "num_tokens": 649767584.0, "step": 3405 }, { "epoch": 1.162485065710872, "grad_norm": 0.2541636407077378, "learning_rate": 3.402883156297421e-05, "loss": 0.32, "num_tokens": 649944610.0, "step": 3406 }, { "epoch": 1.1628264208909371, "grad_norm": 0.24799427972510704, "learning_rate": 3.4022508851795654e-05, "loss": 0.3103, "num_tokens": 650128779.0, "step": 3407 }, { "epoch": 1.163167776071002, "grad_norm": 0.21462684506646176, "learning_rate": 3.4016186140617095e-05, "loss": 0.2886, "num_tokens": 650285703.0, "step": 3408 }, { "epoch": 1.1635091312510668, "grad_norm": 0.22708595846560115, "learning_rate": 3.400986342943854e-05, "loss": 0.3192, "num_tokens": 650504460.0, "step": 3409 }, { "epoch": 1.1638504864311316, "grad_norm": 0.2255400225258728, "learning_rate": 3.400354071825999e-05, "loss": 0.3099, "num_tokens": 650669191.0, "step": 3410 }, { "epoch": 1.1641918416111965, "grad_norm": 0.23042216156252235, "learning_rate": 3.3997218007081436e-05, "loss": 0.2928, "num_tokens": 650846809.0, "step": 3411 }, { "epoch": 1.1645331967912613, "grad_norm": 0.2248783459923973, "learning_rate": 3.399089529590288e-05, "loss": 0.3148, "num_tokens": 651033166.0, "step": 3412 }, { "epoch": 1.1648745519713262, "grad_norm": 0.22613785808969716, "learning_rate": 3.3984572584724336e-05, "loss": 0.2971, "num_tokens": 651256973.0, "step": 3413 }, { "epoch": 1.165215907151391, "grad_norm": 0.2449577791610012, "learning_rate": 3.3978249873545777e-05, "loss": 0.3272, "num_tokens": 651412913.0, "step": 3414 }, { "epoch": 1.1655572623314558, "grad_norm": 0.23155109535573004, "learning_rate": 3.3971927162367224e-05, "loss": 0.3655, "num_tokens": 651624085.0, "step": 3415 }, { "epoch": 1.1658986175115207, "grad_norm": 0.2575257686273046, "learning_rate": 3.396560445118867e-05, "loss": 0.3075, "num_tokens": 651781897.0, "step": 3416 }, { "epoch": 1.1662399726915855, "grad_norm": 0.2420479153092766, "learning_rate": 3.395928174001012e-05, "loss": 0.3082, "num_tokens": 651986740.0, "step": 3417 }, { "epoch": 1.1665813278716504, "grad_norm": 0.22507413806689638, "learning_rate": 3.3952959028831564e-05, "loss": 0.2905, "num_tokens": 652129507.0, "step": 3418 }, { "epoch": 1.1669226830517152, "grad_norm": 0.2415548483879132, "learning_rate": 3.394663631765301e-05, "loss": 0.3199, "num_tokens": 652347229.0, "step": 3419 }, { "epoch": 1.1672640382317803, "grad_norm": 0.21570467520652756, "learning_rate": 3.394031360647446e-05, "loss": 0.3064, "num_tokens": 652509280.0, "step": 3420 }, { "epoch": 1.167605393411845, "grad_norm": 0.22878994591223017, "learning_rate": 3.3933990895295905e-05, "loss": 0.3167, "num_tokens": 652712790.0, "step": 3421 }, { "epoch": 1.16794674859191, "grad_norm": 0.23291174760594327, "learning_rate": 3.392766818411735e-05, "loss": 0.3037, "num_tokens": 652900066.0, "step": 3422 }, { "epoch": 1.1682881037719748, "grad_norm": 0.20908328919092176, "learning_rate": 3.39213454729388e-05, "loss": 0.3383, "num_tokens": 653123782.0, "step": 3423 }, { "epoch": 1.1686294589520396, "grad_norm": 0.2525579144338519, "learning_rate": 3.3915022761760246e-05, "loss": 0.3126, "num_tokens": 653317892.0, "step": 3424 }, { "epoch": 1.1689708141321045, "grad_norm": 0.21531844068637807, "learning_rate": 3.390870005058169e-05, "loss": 0.3145, "num_tokens": 653478272.0, "step": 3425 }, { "epoch": 1.1693121693121693, "grad_norm": 0.2570980053108043, "learning_rate": 3.3902377339403134e-05, "loss": 0.3576, "num_tokens": 653675038.0, "step": 3426 }, { "epoch": 1.1696535244922341, "grad_norm": 0.23410773342950683, "learning_rate": 3.389605462822458e-05, "loss": 0.3017, "num_tokens": 653852963.0, "step": 3427 }, { "epoch": 1.169994879672299, "grad_norm": 0.25637946594605704, "learning_rate": 3.388973191704603e-05, "loss": 0.3115, "num_tokens": 654066350.0, "step": 3428 }, { "epoch": 1.1703362348523638, "grad_norm": 0.205959866385753, "learning_rate": 3.388340920586748e-05, "loss": 0.3109, "num_tokens": 654265950.0, "step": 3429 }, { "epoch": 1.1706775900324287, "grad_norm": 0.22278012946875245, "learning_rate": 3.387708649468893e-05, "loss": 0.2946, "num_tokens": 654469626.0, "step": 3430 }, { "epoch": 1.1710189452124937, "grad_norm": 0.19910611646321852, "learning_rate": 3.3870763783510375e-05, "loss": 0.2967, "num_tokens": 654661231.0, "step": 3431 }, { "epoch": 1.1713603003925583, "grad_norm": 0.24772201648767433, "learning_rate": 3.3864441072331815e-05, "loss": 0.3067, "num_tokens": 654843906.0, "step": 3432 }, { "epoch": 1.1717016555726234, "grad_norm": 0.20579512529624922, "learning_rate": 3.385811836115326e-05, "loss": 0.3234, "num_tokens": 655057508.0, "step": 3433 }, { "epoch": 1.1720430107526882, "grad_norm": 0.2381457657934305, "learning_rate": 3.385179564997471e-05, "loss": 0.293, "num_tokens": 655222174.0, "step": 3434 }, { "epoch": 1.172384365932753, "grad_norm": 0.2395591137028401, "learning_rate": 3.3845472938796156e-05, "loss": 0.3045, "num_tokens": 655405054.0, "step": 3435 }, { "epoch": 1.172725721112818, "grad_norm": 0.21775474127769506, "learning_rate": 3.3839150227617603e-05, "loss": 0.3358, "num_tokens": 655595398.0, "step": 3436 }, { "epoch": 1.1730670762928828, "grad_norm": 0.23852426167643612, "learning_rate": 3.383282751643905e-05, "loss": 0.2826, "num_tokens": 655784257.0, "step": 3437 }, { "epoch": 1.1734084314729476, "grad_norm": 0.23216311135420603, "learning_rate": 3.38265048052605e-05, "loss": 0.3476, "num_tokens": 656000359.0, "step": 3438 }, { "epoch": 1.1737497866530124, "grad_norm": 0.2411130375630135, "learning_rate": 3.3820182094081944e-05, "loss": 0.3344, "num_tokens": 656204902.0, "step": 3439 }, { "epoch": 1.1740911418330773, "grad_norm": 0.2242689737154893, "learning_rate": 3.381385938290339e-05, "loss": 0.3156, "num_tokens": 656377772.0, "step": 3440 }, { "epoch": 1.1744324970131421, "grad_norm": 0.24211825831978656, "learning_rate": 3.380753667172484e-05, "loss": 0.3193, "num_tokens": 656569024.0, "step": 3441 }, { "epoch": 1.174773852193207, "grad_norm": 0.21189890425522834, "learning_rate": 3.3801213960546285e-05, "loss": 0.3071, "num_tokens": 656780163.0, "step": 3442 }, { "epoch": 1.1751152073732718, "grad_norm": 0.20101735434731277, "learning_rate": 3.379489124936773e-05, "loss": 0.3025, "num_tokens": 657014450.0, "step": 3443 }, { "epoch": 1.1754565625533369, "grad_norm": 0.23016211108667753, "learning_rate": 3.378856853818917e-05, "loss": 0.3304, "num_tokens": 657218724.0, "step": 3444 }, { "epoch": 1.1757979177334017, "grad_norm": 0.22319058361846814, "learning_rate": 3.378224582701062e-05, "loss": 0.2782, "num_tokens": 657394352.0, "step": 3445 }, { "epoch": 1.1761392729134665, "grad_norm": 0.22921618685991274, "learning_rate": 3.377592311583207e-05, "loss": 0.3363, "num_tokens": 657604578.0, "step": 3446 }, { "epoch": 1.1764806280935314, "grad_norm": 0.2511552325546683, "learning_rate": 3.376960040465352e-05, "loss": 0.3314, "num_tokens": 657777725.0, "step": 3447 }, { "epoch": 1.1768219832735962, "grad_norm": 0.2140250885575136, "learning_rate": 3.376327769347497e-05, "loss": 0.2878, "num_tokens": 657990914.0, "step": 3448 }, { "epoch": 1.177163338453661, "grad_norm": 0.22722644466476122, "learning_rate": 3.3756954982296414e-05, "loss": 0.3009, "num_tokens": 658205266.0, "step": 3449 }, { "epoch": 1.177504693633726, "grad_norm": 0.21525627900055072, "learning_rate": 3.3750632271117854e-05, "loss": 0.3192, "num_tokens": 658401490.0, "step": 3450 }, { "epoch": 1.1778460488137907, "grad_norm": 0.230851406384967, "learning_rate": 3.37443095599393e-05, "loss": 0.2747, "num_tokens": 658579083.0, "step": 3451 }, { "epoch": 1.1781874039938556, "grad_norm": 0.22773854636423252, "learning_rate": 3.373798684876075e-05, "loss": 0.2861, "num_tokens": 658747131.0, "step": 3452 }, { "epoch": 1.1785287591739204, "grad_norm": 0.22716729947467898, "learning_rate": 3.3731664137582195e-05, "loss": 0.3098, "num_tokens": 658928419.0, "step": 3453 }, { "epoch": 1.1788701143539853, "grad_norm": 0.22953128979376497, "learning_rate": 3.372534142640364e-05, "loss": 0.2979, "num_tokens": 659142624.0, "step": 3454 }, { "epoch": 1.17921146953405, "grad_norm": 0.20906506796940758, "learning_rate": 3.371901871522509e-05, "loss": 0.3124, "num_tokens": 659344691.0, "step": 3455 }, { "epoch": 1.179552824714115, "grad_norm": 0.2346918739316121, "learning_rate": 3.3712696004046536e-05, "loss": 0.2895, "num_tokens": 659529831.0, "step": 3456 }, { "epoch": 1.17989417989418, "grad_norm": 0.22619124811077554, "learning_rate": 3.370637329286798e-05, "loss": 0.3033, "num_tokens": 659711990.0, "step": 3457 }, { "epoch": 1.1802355350742448, "grad_norm": 0.2185897529188664, "learning_rate": 3.370005058168943e-05, "loss": 0.3082, "num_tokens": 659900231.0, "step": 3458 }, { "epoch": 1.1805768902543097, "grad_norm": 0.22750761119951493, "learning_rate": 3.369372787051088e-05, "loss": 0.33, "num_tokens": 660099568.0, "step": 3459 }, { "epoch": 1.1809182454343745, "grad_norm": 0.2294227430573283, "learning_rate": 3.3687405159332324e-05, "loss": 0.3081, "num_tokens": 660290443.0, "step": 3460 }, { "epoch": 1.1812596006144394, "grad_norm": 0.23330816222949338, "learning_rate": 3.368108244815377e-05, "loss": 0.3545, "num_tokens": 660515838.0, "step": 3461 }, { "epoch": 1.1816009557945042, "grad_norm": 0.22296801810005512, "learning_rate": 3.367475973697521e-05, "loss": 0.3126, "num_tokens": 660714070.0, "step": 3462 }, { "epoch": 1.181942310974569, "grad_norm": 0.2150206594376217, "learning_rate": 3.3668437025796665e-05, "loss": 0.2882, "num_tokens": 660882231.0, "step": 3463 }, { "epoch": 1.1822836661546339, "grad_norm": 0.22484855301174902, "learning_rate": 3.366211431461811e-05, "loss": 0.3204, "num_tokens": 661103341.0, "step": 3464 }, { "epoch": 1.1826250213346987, "grad_norm": 0.22405860672028188, "learning_rate": 3.365579160343956e-05, "loss": 0.3189, "num_tokens": 661298878.0, "step": 3465 }, { "epoch": 1.1829663765147636, "grad_norm": 0.22102496958266407, "learning_rate": 3.3649468892261006e-05, "loss": 0.285, "num_tokens": 661484655.0, "step": 3466 }, { "epoch": 1.1833077316948284, "grad_norm": 0.252888989876531, "learning_rate": 3.364314618108245e-05, "loss": 0.3256, "num_tokens": 661670501.0, "step": 3467 }, { "epoch": 1.1836490868748932, "grad_norm": 0.2369689451384865, "learning_rate": 3.363682346990389e-05, "loss": 0.3171, "num_tokens": 661848465.0, "step": 3468 }, { "epoch": 1.183990442054958, "grad_norm": 0.24349044468914322, "learning_rate": 3.363050075872534e-05, "loss": 0.3127, "num_tokens": 662009618.0, "step": 3469 }, { "epoch": 1.1843317972350231, "grad_norm": 0.26471722123657737, "learning_rate": 3.362417804754679e-05, "loss": 0.3278, "num_tokens": 662237001.0, "step": 3470 }, { "epoch": 1.184673152415088, "grad_norm": 0.20556844697761512, "learning_rate": 3.3617855336368234e-05, "loss": 0.2872, "num_tokens": 662418983.0, "step": 3471 }, { "epoch": 1.1850145075951528, "grad_norm": 0.23006149215991706, "learning_rate": 3.361153262518968e-05, "loss": 0.2959, "num_tokens": 662596139.0, "step": 3472 }, { "epoch": 1.1853558627752177, "grad_norm": 0.27240359281485793, "learning_rate": 3.3605209914011135e-05, "loss": 0.296, "num_tokens": 662774689.0, "step": 3473 }, { "epoch": 1.1856972179552825, "grad_norm": 0.21213592022644465, "learning_rate": 3.3598887202832575e-05, "loss": 0.2976, "num_tokens": 662957820.0, "step": 3474 }, { "epoch": 1.1860385731353473, "grad_norm": 0.25510775789497986, "learning_rate": 3.359256449165402e-05, "loss": 0.3183, "num_tokens": 663119465.0, "step": 3475 }, { "epoch": 1.1863799283154122, "grad_norm": 0.26830655286509264, "learning_rate": 3.358624178047547e-05, "loss": 0.2979, "num_tokens": 663299937.0, "step": 3476 }, { "epoch": 1.186721283495477, "grad_norm": 0.22222462315767594, "learning_rate": 3.3579919069296916e-05, "loss": 0.3203, "num_tokens": 663491442.0, "step": 3477 }, { "epoch": 1.1870626386755418, "grad_norm": 0.22434980699696358, "learning_rate": 3.357359635811836e-05, "loss": 0.3188, "num_tokens": 663690160.0, "step": 3478 }, { "epoch": 1.1874039938556067, "grad_norm": 0.21437862682726458, "learning_rate": 3.356727364693981e-05, "loss": 0.2893, "num_tokens": 663920462.0, "step": 3479 }, { "epoch": 1.1877453490356715, "grad_norm": 0.22085669759568521, "learning_rate": 3.356095093576126e-05, "loss": 0.323, "num_tokens": 664117986.0, "step": 3480 }, { "epoch": 1.1880867042157366, "grad_norm": 0.2232049464705644, "learning_rate": 3.3554628224582704e-05, "loss": 0.2784, "num_tokens": 664319808.0, "step": 3481 }, { "epoch": 1.1884280593958014, "grad_norm": 0.2010596124456435, "learning_rate": 3.354830551340415e-05, "loss": 0.3575, "num_tokens": 664561504.0, "step": 3482 }, { "epoch": 1.1887694145758663, "grad_norm": 0.25086054793695056, "learning_rate": 3.35419828022256e-05, "loss": 0.3082, "num_tokens": 664790301.0, "step": 3483 }, { "epoch": 1.189110769755931, "grad_norm": 0.22214774987019725, "learning_rate": 3.3535660091047045e-05, "loss": 0.3021, "num_tokens": 664974308.0, "step": 3484 }, { "epoch": 1.189452124935996, "grad_norm": 0.24468797577293655, "learning_rate": 3.352933737986849e-05, "loss": 0.3019, "num_tokens": 665136066.0, "step": 3485 }, { "epoch": 1.1897934801160608, "grad_norm": 0.23434937491284824, "learning_rate": 3.352301466868993e-05, "loss": 0.326, "num_tokens": 665327584.0, "step": 3486 }, { "epoch": 1.1901348352961256, "grad_norm": 1.0673358917355487, "learning_rate": 3.351669195751138e-05, "loss": 0.3571, "num_tokens": 665506548.0, "step": 3487 }, { "epoch": 1.1904761904761905, "grad_norm": 0.24896177284395568, "learning_rate": 3.3510369246332826e-05, "loss": 0.3135, "num_tokens": 665686442.0, "step": 3488 }, { "epoch": 1.1908175456562553, "grad_norm": 0.43478310908448986, "learning_rate": 3.350404653515427e-05, "loss": 0.33, "num_tokens": 665854884.0, "step": 3489 }, { "epoch": 1.1911589008363201, "grad_norm": 0.22008760655877752, "learning_rate": 3.349772382397573e-05, "loss": 0.3118, "num_tokens": 666031977.0, "step": 3490 }, { "epoch": 1.191500256016385, "grad_norm": 0.23830075579824023, "learning_rate": 3.3491401112797174e-05, "loss": 0.3011, "num_tokens": 666212820.0, "step": 3491 }, { "epoch": 1.1918416111964498, "grad_norm": 0.23624252981622937, "learning_rate": 3.3485078401618614e-05, "loss": 0.2917, "num_tokens": 666414011.0, "step": 3492 }, { "epoch": 1.1921829663765147, "grad_norm": 0.22077195032169006, "learning_rate": 3.347875569044006e-05, "loss": 0.2827, "num_tokens": 666566317.0, "step": 3493 }, { "epoch": 1.1925243215565797, "grad_norm": 0.25855823120632404, "learning_rate": 3.347243297926151e-05, "loss": 0.3195, "num_tokens": 666761450.0, "step": 3494 }, { "epoch": 1.1928656767366446, "grad_norm": 0.2256983325103009, "learning_rate": 3.3466110268082955e-05, "loss": 0.2985, "num_tokens": 666968363.0, "step": 3495 }, { "epoch": 1.1932070319167094, "grad_norm": 0.23051948604931666, "learning_rate": 3.34597875569044e-05, "loss": 0.3336, "num_tokens": 667156419.0, "step": 3496 }, { "epoch": 1.1935483870967742, "grad_norm": 0.21381578694704564, "learning_rate": 3.345346484572585e-05, "loss": 0.2919, "num_tokens": 667361917.0, "step": 3497 }, { "epoch": 1.193889742276839, "grad_norm": 0.23088666867020985, "learning_rate": 3.3447142134547296e-05, "loss": 0.3266, "num_tokens": 667519441.0, "step": 3498 }, { "epoch": 1.194231097456904, "grad_norm": 0.23678898637656728, "learning_rate": 3.344081942336874e-05, "loss": 0.297, "num_tokens": 667711962.0, "step": 3499 }, { "epoch": 1.1945724526369688, "grad_norm": 0.2509785372608103, "learning_rate": 3.343449671219019e-05, "loss": 0.3205, "num_tokens": 667896760.0, "step": 3500 }, { "epoch": 1.1949138078170336, "grad_norm": 0.23001102730151415, "learning_rate": 3.342817400101164e-05, "loss": 0.2932, "num_tokens": 668094231.0, "step": 3501 }, { "epoch": 1.1952551629970984, "grad_norm": 0.24840775271497023, "learning_rate": 3.3421851289833084e-05, "loss": 0.2796, "num_tokens": 668270213.0, "step": 3502 }, { "epoch": 1.1955965181771633, "grad_norm": 0.37741616534377187, "learning_rate": 3.341552857865453e-05, "loss": 0.2967, "num_tokens": 668477332.0, "step": 3503 }, { "epoch": 1.1959378733572281, "grad_norm": 0.22870557922980456, "learning_rate": 3.340920586747597e-05, "loss": 0.3097, "num_tokens": 668675020.0, "step": 3504 }, { "epoch": 1.196279228537293, "grad_norm": 0.23520251455769495, "learning_rate": 3.340288315629742e-05, "loss": 0.334, "num_tokens": 668857813.0, "step": 3505 }, { "epoch": 1.1966205837173578, "grad_norm": 0.24826228634284964, "learning_rate": 3.339656044511887e-05, "loss": 0.3457, "num_tokens": 669119462.0, "step": 3506 }, { "epoch": 1.1969619388974229, "grad_norm": 0.2589779090129509, "learning_rate": 3.339023773394032e-05, "loss": 0.3536, "num_tokens": 669356459.0, "step": 3507 }, { "epoch": 1.1973032940774877, "grad_norm": 0.22106653970829235, "learning_rate": 3.3383915022761766e-05, "loss": 0.3199, "num_tokens": 669511823.0, "step": 3508 }, { "epoch": 1.1976446492575525, "grad_norm": 0.24344470622678926, "learning_rate": 3.337759231158321e-05, "loss": 0.2967, "num_tokens": 669686927.0, "step": 3509 }, { "epoch": 1.1979860044376174, "grad_norm": 0.2226360511078573, "learning_rate": 3.337126960040465e-05, "loss": 0.3235, "num_tokens": 669877795.0, "step": 3510 }, { "epoch": 1.1983273596176822, "grad_norm": 0.24272102699392165, "learning_rate": 3.33649468892261e-05, "loss": 0.3033, "num_tokens": 670032489.0, "step": 3511 }, { "epoch": 1.198668714797747, "grad_norm": 0.1949794576471132, "learning_rate": 3.335862417804755e-05, "loss": 0.3113, "num_tokens": 670224400.0, "step": 3512 }, { "epoch": 1.199010069977812, "grad_norm": 0.2482124244069189, "learning_rate": 3.3352301466868994e-05, "loss": 0.2983, "num_tokens": 670398184.0, "step": 3513 }, { "epoch": 1.1993514251578767, "grad_norm": 0.21992680939908696, "learning_rate": 3.334597875569044e-05, "loss": 0.3038, "num_tokens": 670629233.0, "step": 3514 }, { "epoch": 1.1996927803379416, "grad_norm": 0.2061674818390431, "learning_rate": 3.333965604451189e-05, "loss": 0.2899, "num_tokens": 670799880.0, "step": 3515 }, { "epoch": 1.2000341355180064, "grad_norm": 0.24203722541411962, "learning_rate": 3.3333333333333335e-05, "loss": 0.3166, "num_tokens": 671000860.0, "step": 3516 }, { "epoch": 1.2003754906980713, "grad_norm": 0.2137428766006786, "learning_rate": 3.332701062215478e-05, "loss": 0.3134, "num_tokens": 671219866.0, "step": 3517 }, { "epoch": 1.2007168458781363, "grad_norm": 0.24290340008390876, "learning_rate": 3.332068791097623e-05, "loss": 0.3157, "num_tokens": 671438262.0, "step": 3518 }, { "epoch": 1.201058201058201, "grad_norm": 0.20136544953539462, "learning_rate": 3.3314365199797676e-05, "loss": 0.2851, "num_tokens": 671634404.0, "step": 3519 }, { "epoch": 1.201399556238266, "grad_norm": 0.21401979297219903, "learning_rate": 3.330804248861912e-05, "loss": 0.3316, "num_tokens": 671810331.0, "step": 3520 }, { "epoch": 1.2017409114183308, "grad_norm": 0.24706045637293975, "learning_rate": 3.330171977744056e-05, "loss": 0.3254, "num_tokens": 672016986.0, "step": 3521 }, { "epoch": 1.2020822665983957, "grad_norm": 0.21971451512460463, "learning_rate": 3.329539706626201e-05, "loss": 0.2923, "num_tokens": 672220361.0, "step": 3522 }, { "epoch": 1.2024236217784605, "grad_norm": 0.1953344288594552, "learning_rate": 3.3289074355083464e-05, "loss": 0.321, "num_tokens": 672464038.0, "step": 3523 }, { "epoch": 1.2027649769585254, "grad_norm": 0.21405174489225795, "learning_rate": 3.328275164390491e-05, "loss": 0.3027, "num_tokens": 672633062.0, "step": 3524 }, { "epoch": 1.2031063321385902, "grad_norm": 0.24616592971624957, "learning_rate": 3.327642893272636e-05, "loss": 0.3197, "num_tokens": 672824246.0, "step": 3525 }, { "epoch": 1.203447687318655, "grad_norm": 0.24609192032190166, "learning_rate": 3.3270106221547805e-05, "loss": 0.327, "num_tokens": 673003202.0, "step": 3526 }, { "epoch": 1.2037890424987199, "grad_norm": 0.24906369556541416, "learning_rate": 3.3263783510369245e-05, "loss": 0.2962, "num_tokens": 673169654.0, "step": 3527 }, { "epoch": 1.2041303976787847, "grad_norm": 0.23164607738905157, "learning_rate": 3.325746079919069e-05, "loss": 0.3081, "num_tokens": 673346832.0, "step": 3528 }, { "epoch": 1.2044717528588496, "grad_norm": 0.24862146766526083, "learning_rate": 3.325113808801214e-05, "loss": 0.3208, "num_tokens": 673489979.0, "step": 3529 }, { "epoch": 1.2048131080389144, "grad_norm": 0.25497700327854206, "learning_rate": 3.3244815376833586e-05, "loss": 0.2824, "num_tokens": 673686353.0, "step": 3530 }, { "epoch": 1.2051544632189795, "grad_norm": 0.19849409122290643, "learning_rate": 3.323849266565503e-05, "loss": 0.3012, "num_tokens": 673864378.0, "step": 3531 }, { "epoch": 1.2054958183990443, "grad_norm": 0.24011990704097255, "learning_rate": 3.323216995447648e-05, "loss": 0.2867, "num_tokens": 674068700.0, "step": 3532 }, { "epoch": 1.2058371735791091, "grad_norm": 0.19631571455699703, "learning_rate": 3.322584724329793e-05, "loss": 0.2878, "num_tokens": 674259066.0, "step": 3533 }, { "epoch": 1.206178528759174, "grad_norm": 0.2029292142135432, "learning_rate": 3.3219524532119374e-05, "loss": 0.3272, "num_tokens": 674480023.0, "step": 3534 }, { "epoch": 1.2065198839392388, "grad_norm": 0.24703324972961163, "learning_rate": 3.321320182094082e-05, "loss": 0.302, "num_tokens": 674664576.0, "step": 3535 }, { "epoch": 1.2068612391193037, "grad_norm": 0.2203292375959898, "learning_rate": 3.320687910976227e-05, "loss": 0.3496, "num_tokens": 674873544.0, "step": 3536 }, { "epoch": 1.2072025942993685, "grad_norm": 0.40471169728376527, "learning_rate": 3.3200556398583715e-05, "loss": 0.2957, "num_tokens": 675030172.0, "step": 3537 }, { "epoch": 1.2075439494794333, "grad_norm": 0.17469875947715469, "learning_rate": 3.319423368740516e-05, "loss": 0.2967, "num_tokens": 675238761.0, "step": 3538 }, { "epoch": 1.2078853046594982, "grad_norm": 0.24223904139377433, "learning_rate": 3.31879109762266e-05, "loss": 0.3079, "num_tokens": 675415393.0, "step": 3539 }, { "epoch": 1.208226659839563, "grad_norm": 0.21812209994357962, "learning_rate": 3.3181588265048056e-05, "loss": 0.3268, "num_tokens": 675598512.0, "step": 3540 }, { "epoch": 1.2085680150196279, "grad_norm": 0.2802020456750486, "learning_rate": 3.31752655538695e-05, "loss": 0.2839, "num_tokens": 675738976.0, "step": 3541 }, { "epoch": 1.2089093701996927, "grad_norm": 0.22821356359716757, "learning_rate": 3.316894284269095e-05, "loss": 0.3715, "num_tokens": 675957681.0, "step": 3542 }, { "epoch": 1.2092507253797575, "grad_norm": 0.2373998330482573, "learning_rate": 3.31626201315124e-05, "loss": 0.317, "num_tokens": 676132230.0, "step": 3543 }, { "epoch": 1.2095920805598226, "grad_norm": 0.23988091565418232, "learning_rate": 3.3156297420333844e-05, "loss": 0.3034, "num_tokens": 676299369.0, "step": 3544 }, { "epoch": 1.2099334357398874, "grad_norm": 0.2362920234254385, "learning_rate": 3.3149974709155284e-05, "loss": 0.3229, "num_tokens": 676492025.0, "step": 3545 }, { "epoch": 1.2102747909199523, "grad_norm": 0.21820551113719378, "learning_rate": 3.314365199797673e-05, "loss": 0.3001, "num_tokens": 676659694.0, "step": 3546 }, { "epoch": 1.210616146100017, "grad_norm": 0.2772281598283193, "learning_rate": 3.313732928679818e-05, "loss": 0.3102, "num_tokens": 676837129.0, "step": 3547 }, { "epoch": 1.210957501280082, "grad_norm": 0.22101024847047265, "learning_rate": 3.3131006575619625e-05, "loss": 0.3106, "num_tokens": 677004351.0, "step": 3548 }, { "epoch": 1.2112988564601468, "grad_norm": 0.24099900065788357, "learning_rate": 3.312468386444107e-05, "loss": 0.3023, "num_tokens": 677193247.0, "step": 3549 }, { "epoch": 1.2116402116402116, "grad_norm": 0.2294032732316437, "learning_rate": 3.3118361153262526e-05, "loss": 0.3091, "num_tokens": 677360050.0, "step": 3550 }, { "epoch": 1.2119815668202765, "grad_norm": 0.21135432112166444, "learning_rate": 3.3112038442083966e-05, "loss": 0.3139, "num_tokens": 677578497.0, "step": 3551 }, { "epoch": 1.2123229220003413, "grad_norm": 0.23933825798051148, "learning_rate": 3.310571573090541e-05, "loss": 0.3309, "num_tokens": 677767013.0, "step": 3552 }, { "epoch": 1.2126642771804061, "grad_norm": 0.2377532095482337, "learning_rate": 3.309939301972686e-05, "loss": 0.333, "num_tokens": 677983536.0, "step": 3553 }, { "epoch": 1.213005632360471, "grad_norm": 0.23924240795402568, "learning_rate": 3.309307030854831e-05, "loss": 0.2901, "num_tokens": 678155823.0, "step": 3554 }, { "epoch": 1.213346987540536, "grad_norm": 0.2627165226539522, "learning_rate": 3.3086747597369754e-05, "loss": 0.3282, "num_tokens": 678351403.0, "step": 3555 }, { "epoch": 1.2136883427206007, "grad_norm": 0.24369399660083885, "learning_rate": 3.30804248861912e-05, "loss": 0.3212, "num_tokens": 678518203.0, "step": 3556 }, { "epoch": 1.2140296979006657, "grad_norm": 0.22425372107820074, "learning_rate": 3.307410217501265e-05, "loss": 0.3087, "num_tokens": 678696721.0, "step": 3557 }, { "epoch": 1.2143710530807306, "grad_norm": 0.22103098351063263, "learning_rate": 3.3067779463834095e-05, "loss": 0.2714, "num_tokens": 678896318.0, "step": 3558 }, { "epoch": 1.2147124082607954, "grad_norm": 0.25474215014297913, "learning_rate": 3.306145675265554e-05, "loss": 0.3221, "num_tokens": 679063341.0, "step": 3559 }, { "epoch": 1.2150537634408602, "grad_norm": 0.24406854572919146, "learning_rate": 3.305513404147699e-05, "loss": 0.3126, "num_tokens": 679239257.0, "step": 3560 }, { "epoch": 1.215395118620925, "grad_norm": 0.22702569923619173, "learning_rate": 3.3048811330298436e-05, "loss": 0.2889, "num_tokens": 679393263.0, "step": 3561 }, { "epoch": 1.21573647380099, "grad_norm": 0.2609160989210133, "learning_rate": 3.304248861911988e-05, "loss": 0.316, "num_tokens": 679580268.0, "step": 3562 }, { "epoch": 1.2160778289810548, "grad_norm": 0.26195137922350076, "learning_rate": 3.303616590794132e-05, "loss": 0.3366, "num_tokens": 679793992.0, "step": 3563 }, { "epoch": 1.2164191841611196, "grad_norm": 0.2153182460613605, "learning_rate": 3.302984319676277e-05, "loss": 0.293, "num_tokens": 680024664.0, "step": 3564 }, { "epoch": 1.2167605393411844, "grad_norm": 0.21244696633652568, "learning_rate": 3.302352048558422e-05, "loss": 0.3136, "num_tokens": 680246671.0, "step": 3565 }, { "epoch": 1.2171018945212493, "grad_norm": 0.23123358883931158, "learning_rate": 3.3017197774405664e-05, "loss": 0.3227, "num_tokens": 680448764.0, "step": 3566 }, { "epoch": 1.2174432497013141, "grad_norm": 0.25031529726412044, "learning_rate": 3.301087506322712e-05, "loss": 0.298, "num_tokens": 680601664.0, "step": 3567 }, { "epoch": 1.2177846048813792, "grad_norm": 0.21092677621872433, "learning_rate": 3.3004552352048564e-05, "loss": 0.3058, "num_tokens": 680788124.0, "step": 3568 }, { "epoch": 1.218125960061444, "grad_norm": 0.24478112246193395, "learning_rate": 3.2998229640870005e-05, "loss": 0.3509, "num_tokens": 681005981.0, "step": 3569 }, { "epoch": 1.2184673152415089, "grad_norm": 0.20526674576971327, "learning_rate": 3.299190692969145e-05, "loss": 0.2958, "num_tokens": 681229599.0, "step": 3570 }, { "epoch": 1.2188086704215737, "grad_norm": 0.231886243166327, "learning_rate": 3.29855842185129e-05, "loss": 0.2853, "num_tokens": 681391623.0, "step": 3571 }, { "epoch": 1.2191500256016385, "grad_norm": 0.2391755369059274, "learning_rate": 3.2979261507334346e-05, "loss": 0.3078, "num_tokens": 681611554.0, "step": 3572 }, { "epoch": 1.2194913807817034, "grad_norm": 0.21924237218202405, "learning_rate": 3.297293879615579e-05, "loss": 0.2876, "num_tokens": 681797580.0, "step": 3573 }, { "epoch": 1.2198327359617682, "grad_norm": 0.235377286428083, "learning_rate": 3.296661608497724e-05, "loss": 0.3236, "num_tokens": 681982293.0, "step": 3574 }, { "epoch": 1.220174091141833, "grad_norm": 0.23672395794506756, "learning_rate": 3.2960293373798687e-05, "loss": 0.2901, "num_tokens": 682140584.0, "step": 3575 }, { "epoch": 1.220515446321898, "grad_norm": 0.2133507735831047, "learning_rate": 3.2953970662620134e-05, "loss": 0.298, "num_tokens": 682318922.0, "step": 3576 }, { "epoch": 1.2208568015019627, "grad_norm": 0.2496243897405872, "learning_rate": 3.294764795144158e-05, "loss": 0.338, "num_tokens": 682472808.0, "step": 3577 }, { "epoch": 1.2211981566820276, "grad_norm": 3.3993835227867417, "learning_rate": 3.294132524026303e-05, "loss": 0.3235, "num_tokens": 682694984.0, "step": 3578 }, { "epoch": 1.2215395118620924, "grad_norm": 0.22565961091062514, "learning_rate": 3.2935002529084475e-05, "loss": 0.3036, "num_tokens": 682914788.0, "step": 3579 }, { "epoch": 1.2218808670421573, "grad_norm": 0.21041762247674592, "learning_rate": 3.292867981790592e-05, "loss": 0.3189, "num_tokens": 683084445.0, "step": 3580 }, { "epoch": 1.2222222222222223, "grad_norm": 0.24900559102532838, "learning_rate": 3.292235710672736e-05, "loss": 0.3081, "num_tokens": 683257070.0, "step": 3581 }, { "epoch": 1.2225635774022872, "grad_norm": 0.24164305516287216, "learning_rate": 3.291603439554881e-05, "loss": 0.3326, "num_tokens": 683433152.0, "step": 3582 }, { "epoch": 1.222904932582352, "grad_norm": 0.24431775737082595, "learning_rate": 3.290971168437026e-05, "loss": 0.3189, "num_tokens": 683637219.0, "step": 3583 }, { "epoch": 1.2232462877624168, "grad_norm": 0.2915846962988202, "learning_rate": 3.290338897319171e-05, "loss": 0.2946, "num_tokens": 683826875.0, "step": 3584 }, { "epoch": 1.2235876429424817, "grad_norm": 0.22404832747615464, "learning_rate": 3.2897066262013156e-05, "loss": 0.3267, "num_tokens": 684037062.0, "step": 3585 }, { "epoch": 1.2239289981225465, "grad_norm": 0.21646411287147244, "learning_rate": 3.2890743550834603e-05, "loss": 0.3152, "num_tokens": 684236597.0, "step": 3586 }, { "epoch": 1.2242703533026114, "grad_norm": 0.21594359236862123, "learning_rate": 3.2884420839656044e-05, "loss": 0.3145, "num_tokens": 684425870.0, "step": 3587 }, { "epoch": 1.2246117084826762, "grad_norm": 0.2283878136021961, "learning_rate": 3.287809812847749e-05, "loss": 0.3079, "num_tokens": 684619555.0, "step": 3588 }, { "epoch": 1.224953063662741, "grad_norm": 0.222550895606355, "learning_rate": 3.287177541729894e-05, "loss": 0.2953, "num_tokens": 684793326.0, "step": 3589 }, { "epoch": 1.2252944188428059, "grad_norm": 0.2518712848597638, "learning_rate": 3.2865452706120385e-05, "loss": 0.3104, "num_tokens": 684929126.0, "step": 3590 }, { "epoch": 1.2256357740228707, "grad_norm": 0.25853384085438774, "learning_rate": 3.285912999494183e-05, "loss": 0.3154, "num_tokens": 685146155.0, "step": 3591 }, { "epoch": 1.2259771292029358, "grad_norm": 0.22638260095801224, "learning_rate": 3.285280728376328e-05, "loss": 0.3069, "num_tokens": 685376805.0, "step": 3592 }, { "epoch": 1.2263184843830004, "grad_norm": 0.23570018960650457, "learning_rate": 3.2846484572584726e-05, "loss": 0.2956, "num_tokens": 685555971.0, "step": 3593 }, { "epoch": 1.2266598395630655, "grad_norm": 0.23572008321845583, "learning_rate": 3.284016186140617e-05, "loss": 0.3435, "num_tokens": 685788110.0, "step": 3594 }, { "epoch": 1.2270011947431303, "grad_norm": 0.23290237331147867, "learning_rate": 3.283383915022762e-05, "loss": 0.2849, "num_tokens": 685980170.0, "step": 3595 }, { "epoch": 1.2273425499231951, "grad_norm": 0.24828001758678853, "learning_rate": 3.2827516439049066e-05, "loss": 0.3087, "num_tokens": 686195785.0, "step": 3596 }, { "epoch": 1.22768390510326, "grad_norm": 0.21684354365732122, "learning_rate": 3.2821193727870513e-05, "loss": 0.2975, "num_tokens": 686374921.0, "step": 3597 }, { "epoch": 1.2280252602833248, "grad_norm": 0.21623662732114388, "learning_rate": 3.281487101669196e-05, "loss": 0.2968, "num_tokens": 686562005.0, "step": 3598 }, { "epoch": 1.2283666154633897, "grad_norm": 0.2626980858618255, "learning_rate": 3.28085483055134e-05, "loss": 0.3278, "num_tokens": 686765713.0, "step": 3599 }, { "epoch": 1.2287079706434545, "grad_norm": 0.22187874716964104, "learning_rate": 3.2802225594334854e-05, "loss": 0.3115, "num_tokens": 686951242.0, "step": 3600 }, { "epoch": 1.2290493258235193, "grad_norm": 0.23452396234861764, "learning_rate": 3.27959028831563e-05, "loss": 0.2786, "num_tokens": 687123528.0, "step": 3601 }, { "epoch": 1.2293906810035842, "grad_norm": 0.226461984370565, "learning_rate": 3.278958017197775e-05, "loss": 0.3148, "num_tokens": 687279102.0, "step": 3602 }, { "epoch": 1.229732036183649, "grad_norm": 0.229379094510345, "learning_rate": 3.2783257460799195e-05, "loss": 0.3131, "num_tokens": 687511459.0, "step": 3603 }, { "epoch": 1.2300733913637139, "grad_norm": 0.21329892490097638, "learning_rate": 3.277693474962064e-05, "loss": 0.2891, "num_tokens": 687701541.0, "step": 3604 }, { "epoch": 1.230414746543779, "grad_norm": 0.2631068218739915, "learning_rate": 3.277061203844208e-05, "loss": 0.298, "num_tokens": 687865974.0, "step": 3605 }, { "epoch": 1.2307561017238438, "grad_norm": 0.2705549392250849, "learning_rate": 3.276428932726353e-05, "loss": 0.3313, "num_tokens": 688039952.0, "step": 3606 }, { "epoch": 1.2310974569039086, "grad_norm": 0.24780366694189537, "learning_rate": 3.2757966616084977e-05, "loss": 0.3024, "num_tokens": 688210513.0, "step": 3607 }, { "epoch": 1.2314388120839734, "grad_norm": 0.23008429479381948, "learning_rate": 3.2751643904906423e-05, "loss": 0.3177, "num_tokens": 688403442.0, "step": 3608 }, { "epoch": 1.2317801672640383, "grad_norm": 0.21844343538814345, "learning_rate": 3.274532119372787e-05, "loss": 0.2999, "num_tokens": 688582104.0, "step": 3609 }, { "epoch": 1.2321215224441031, "grad_norm": 0.233301879976958, "learning_rate": 3.2738998482549324e-05, "loss": 0.3221, "num_tokens": 688749640.0, "step": 3610 }, { "epoch": 1.232462877624168, "grad_norm": 0.2121719827347105, "learning_rate": 3.2732675771370764e-05, "loss": 0.3207, "num_tokens": 688977601.0, "step": 3611 }, { "epoch": 1.2328042328042328, "grad_norm": 0.2218707164333488, "learning_rate": 3.272635306019221e-05, "loss": 0.3262, "num_tokens": 689141122.0, "step": 3612 }, { "epoch": 1.2331455879842976, "grad_norm": 0.23687367957156055, "learning_rate": 3.272003034901366e-05, "loss": 0.329, "num_tokens": 689343349.0, "step": 3613 }, { "epoch": 1.2334869431643625, "grad_norm": 0.2289765761815702, "learning_rate": 3.2713707637835105e-05, "loss": 0.3055, "num_tokens": 689507425.0, "step": 3614 }, { "epoch": 1.2338282983444273, "grad_norm": 0.2347871218339146, "learning_rate": 3.270738492665655e-05, "loss": 0.3132, "num_tokens": 689716663.0, "step": 3615 }, { "epoch": 1.2341696535244921, "grad_norm": 0.2048999080377295, "learning_rate": 3.2701062215478e-05, "loss": 0.3281, "num_tokens": 689916678.0, "step": 3616 }, { "epoch": 1.234511008704557, "grad_norm": 0.2332426973466352, "learning_rate": 3.2694739504299446e-05, "loss": 0.2782, "num_tokens": 690061101.0, "step": 3617 }, { "epoch": 1.234852363884622, "grad_norm": 0.22575198613710676, "learning_rate": 3.268841679312089e-05, "loss": 0.3218, "num_tokens": 690258640.0, "step": 3618 }, { "epoch": 1.235193719064687, "grad_norm": 0.2165369374600666, "learning_rate": 3.268209408194234e-05, "loss": 0.3247, "num_tokens": 690448573.0, "step": 3619 }, { "epoch": 1.2355350742447517, "grad_norm": 0.24824230617540063, "learning_rate": 3.267577137076379e-05, "loss": 0.3104, "num_tokens": 690662810.0, "step": 3620 }, { "epoch": 1.2358764294248166, "grad_norm": 0.22342123250669005, "learning_rate": 3.2669448659585234e-05, "loss": 0.3126, "num_tokens": 690883165.0, "step": 3621 }, { "epoch": 1.2362177846048814, "grad_norm": 0.26914817120762174, "learning_rate": 3.266312594840668e-05, "loss": 0.3303, "num_tokens": 691043888.0, "step": 3622 }, { "epoch": 1.2365591397849462, "grad_norm": 0.22964527283835506, "learning_rate": 3.265680323722812e-05, "loss": 0.3118, "num_tokens": 691230885.0, "step": 3623 }, { "epoch": 1.236900494965011, "grad_norm": 0.2549837501914844, "learning_rate": 3.265048052604957e-05, "loss": 0.2934, "num_tokens": 691368615.0, "step": 3624 }, { "epoch": 1.237241850145076, "grad_norm": 0.252627532721102, "learning_rate": 3.2644157814871015e-05, "loss": 0.3301, "num_tokens": 691528103.0, "step": 3625 }, { "epoch": 1.2375832053251408, "grad_norm": 0.22253299039239136, "learning_rate": 3.263783510369246e-05, "loss": 0.3054, "num_tokens": 691728464.0, "step": 3626 }, { "epoch": 1.2379245605052056, "grad_norm": 0.21338623737468643, "learning_rate": 3.2631512392513916e-05, "loss": 0.3069, "num_tokens": 691948337.0, "step": 3627 }, { "epoch": 1.2382659156852704, "grad_norm": 0.20232487628443788, "learning_rate": 3.262518968133536e-05, "loss": 0.3198, "num_tokens": 692182597.0, "step": 3628 }, { "epoch": 1.2386072708653353, "grad_norm": 0.21059116391178184, "learning_rate": 3.26188669701568e-05, "loss": 0.2909, "num_tokens": 692414271.0, "step": 3629 }, { "epoch": 1.2389486260454001, "grad_norm": 0.25629248657621834, "learning_rate": 3.261254425897825e-05, "loss": 0.3076, "num_tokens": 692603221.0, "step": 3630 }, { "epoch": 1.2392899812254652, "grad_norm": 0.20972883419735394, "learning_rate": 3.26062215477997e-05, "loss": 0.3357, "num_tokens": 692853661.0, "step": 3631 }, { "epoch": 1.23963133640553, "grad_norm": 0.19760634541994307, "learning_rate": 3.2599898836621144e-05, "loss": 0.3186, "num_tokens": 693051536.0, "step": 3632 }, { "epoch": 1.2399726915855949, "grad_norm": 0.20128264776067026, "learning_rate": 3.259357612544259e-05, "loss": 0.2853, "num_tokens": 693247645.0, "step": 3633 }, { "epoch": 1.2403140467656597, "grad_norm": 0.2538404201248304, "learning_rate": 3.258725341426404e-05, "loss": 0.3139, "num_tokens": 693419848.0, "step": 3634 }, { "epoch": 1.2406554019457245, "grad_norm": 0.20484302601667118, "learning_rate": 3.2580930703085485e-05, "loss": 0.3041, "num_tokens": 693586457.0, "step": 3635 }, { "epoch": 1.2409967571257894, "grad_norm": 0.22854742675097317, "learning_rate": 3.257460799190693e-05, "loss": 0.2827, "num_tokens": 693796378.0, "step": 3636 }, { "epoch": 1.2413381123058542, "grad_norm": 0.2261568009982867, "learning_rate": 3.256828528072838e-05, "loss": 0.3034, "num_tokens": 693974346.0, "step": 3637 }, { "epoch": 1.241679467485919, "grad_norm": 0.21940174499831006, "learning_rate": 3.2561962569549826e-05, "loss": 0.3067, "num_tokens": 694176618.0, "step": 3638 }, { "epoch": 1.242020822665984, "grad_norm": 0.2349322005122278, "learning_rate": 3.255563985837127e-05, "loss": 0.2909, "num_tokens": 694352554.0, "step": 3639 }, { "epoch": 1.2423621778460487, "grad_norm": 0.22563753409787027, "learning_rate": 3.254931714719272e-05, "loss": 0.3042, "num_tokens": 694530872.0, "step": 3640 }, { "epoch": 1.2427035330261136, "grad_norm": 0.21126525253780526, "learning_rate": 3.254299443601416e-05, "loss": 0.334, "num_tokens": 694764284.0, "step": 3641 }, { "epoch": 1.2430448882061786, "grad_norm": 0.22168555037228482, "learning_rate": 3.253667172483561e-05, "loss": 0.289, "num_tokens": 694935672.0, "step": 3642 }, { "epoch": 1.2433862433862433, "grad_norm": 0.18325750788930834, "learning_rate": 3.2530349013657054e-05, "loss": 0.2998, "num_tokens": 695151881.0, "step": 3643 }, { "epoch": 1.2437275985663083, "grad_norm": 0.2240520188315442, "learning_rate": 3.252402630247851e-05, "loss": 0.3041, "num_tokens": 695363091.0, "step": 3644 }, { "epoch": 1.2440689537463732, "grad_norm": 0.19340737398975785, "learning_rate": 3.2517703591299955e-05, "loss": 0.3185, "num_tokens": 695555378.0, "step": 3645 }, { "epoch": 1.244410308926438, "grad_norm": 0.2223586742174555, "learning_rate": 3.25113808801214e-05, "loss": 0.2973, "num_tokens": 695743935.0, "step": 3646 }, { "epoch": 1.2447516641065028, "grad_norm": 0.20376707779958889, "learning_rate": 3.250505816894284e-05, "loss": 0.2833, "num_tokens": 695973008.0, "step": 3647 }, { "epoch": 1.2450930192865677, "grad_norm": 0.1931996208177284, "learning_rate": 3.249873545776429e-05, "loss": 0.3148, "num_tokens": 696179958.0, "step": 3648 }, { "epoch": 1.2454343744666325, "grad_norm": 0.2160474669357375, "learning_rate": 3.2492412746585736e-05, "loss": 0.2918, "num_tokens": 696351283.0, "step": 3649 }, { "epoch": 1.2457757296466974, "grad_norm": 0.23491294865683268, "learning_rate": 3.248609003540718e-05, "loss": 0.3048, "num_tokens": 696545156.0, "step": 3650 }, { "epoch": 1.2461170848267622, "grad_norm": 0.19035803504082288, "learning_rate": 3.247976732422863e-05, "loss": 0.3001, "num_tokens": 696732190.0, "step": 3651 }, { "epoch": 1.246458440006827, "grad_norm": 0.251519211588611, "learning_rate": 3.247344461305008e-05, "loss": 0.3184, "num_tokens": 696925336.0, "step": 3652 }, { "epoch": 1.2467997951868919, "grad_norm": 0.2145296331427125, "learning_rate": 3.2467121901871524e-05, "loss": 0.2576, "num_tokens": 697090586.0, "step": 3653 }, { "epoch": 1.2471411503669567, "grad_norm": 0.23933047345987804, "learning_rate": 3.246079919069297e-05, "loss": 0.3259, "num_tokens": 697280464.0, "step": 3654 }, { "epoch": 1.2474825055470218, "grad_norm": 0.228318129066503, "learning_rate": 3.245447647951442e-05, "loss": 0.2791, "num_tokens": 697457571.0, "step": 3655 }, { "epoch": 1.2478238607270866, "grad_norm": 0.21280655439885554, "learning_rate": 3.2448153768335865e-05, "loss": 0.306, "num_tokens": 697682282.0, "step": 3656 }, { "epoch": 1.2481652159071515, "grad_norm": 0.2141277020236058, "learning_rate": 3.244183105715731e-05, "loss": 0.2977, "num_tokens": 697846911.0, "step": 3657 }, { "epoch": 1.2485065710872163, "grad_norm": 0.21408561397333628, "learning_rate": 3.243550834597875e-05, "loss": 0.2974, "num_tokens": 698033270.0, "step": 3658 }, { "epoch": 1.2488479262672811, "grad_norm": 0.21543972103321538, "learning_rate": 3.24291856348002e-05, "loss": 0.2742, "num_tokens": 698191451.0, "step": 3659 }, { "epoch": 1.249189281447346, "grad_norm": 0.2377145515523135, "learning_rate": 3.242286292362165e-05, "loss": 0.2725, "num_tokens": 698358345.0, "step": 3660 }, { "epoch": 1.2495306366274108, "grad_norm": 0.22774388693736136, "learning_rate": 3.24165402124431e-05, "loss": 0.308, "num_tokens": 698559140.0, "step": 3661 }, { "epoch": 1.2498719918074757, "grad_norm": 0.2201866696403029, "learning_rate": 3.241021750126455e-05, "loss": 0.3115, "num_tokens": 698739201.0, "step": 3662 }, { "epoch": 1.2502133469875405, "grad_norm": 0.23887774054050473, "learning_rate": 3.2403894790085994e-05, "loss": 0.3103, "num_tokens": 698953061.0, "step": 3663 }, { "epoch": 1.2505547021676053, "grad_norm": 0.19699787347677625, "learning_rate": 3.2397572078907434e-05, "loss": 0.2882, "num_tokens": 699128699.0, "step": 3664 }, { "epoch": 1.2508960573476702, "grad_norm": 0.23477834770994332, "learning_rate": 3.239124936772888e-05, "loss": 0.2772, "num_tokens": 699275187.0, "step": 3665 }, { "epoch": 1.2512374125277352, "grad_norm": 0.24018301937659425, "learning_rate": 3.238492665655033e-05, "loss": 0.2989, "num_tokens": 699448737.0, "step": 3666 }, { "epoch": 1.2515787677077999, "grad_norm": 0.24644899586370983, "learning_rate": 3.2378603945371775e-05, "loss": 0.3464, "num_tokens": 699690443.0, "step": 3667 }, { "epoch": 1.251920122887865, "grad_norm": 0.24185310647662514, "learning_rate": 3.237228123419322e-05, "loss": 0.3033, "num_tokens": 699864045.0, "step": 3668 }, { "epoch": 1.2522614780679298, "grad_norm": 0.21780175239636965, "learning_rate": 3.236595852301467e-05, "loss": 0.3153, "num_tokens": 700084028.0, "step": 3669 }, { "epoch": 1.2526028332479946, "grad_norm": 0.21049497504942713, "learning_rate": 3.2359635811836116e-05, "loss": 0.3179, "num_tokens": 700281406.0, "step": 3670 }, { "epoch": 1.2529441884280594, "grad_norm": 0.27571029506624617, "learning_rate": 3.235331310065756e-05, "loss": 0.2846, "num_tokens": 700482418.0, "step": 3671 }, { "epoch": 1.2532855436081243, "grad_norm": 0.20897282887645216, "learning_rate": 3.234699038947901e-05, "loss": 0.3003, "num_tokens": 700668609.0, "step": 3672 }, { "epoch": 1.2536268987881891, "grad_norm": 0.2491549316602015, "learning_rate": 3.234066767830046e-05, "loss": 0.2848, "num_tokens": 700829541.0, "step": 3673 }, { "epoch": 1.253968253968254, "grad_norm": 0.2374943748688261, "learning_rate": 3.2334344967121904e-05, "loss": 0.2831, "num_tokens": 700994433.0, "step": 3674 }, { "epoch": 1.2543096091483188, "grad_norm": 0.19977203248548966, "learning_rate": 3.232802225594335e-05, "loss": 0.3221, "num_tokens": 701185666.0, "step": 3675 }, { "epoch": 1.2546509643283836, "grad_norm": 0.23892647653881682, "learning_rate": 3.232169954476479e-05, "loss": 0.2587, "num_tokens": 701356437.0, "step": 3676 }, { "epoch": 1.2549923195084485, "grad_norm": 0.20885428581072157, "learning_rate": 3.2315376833586245e-05, "loss": 0.3081, "num_tokens": 701574881.0, "step": 3677 }, { "epoch": 1.2553336746885133, "grad_norm": 0.23418472464359433, "learning_rate": 3.230905412240769e-05, "loss": 0.2906, "num_tokens": 701752177.0, "step": 3678 }, { "epoch": 1.2556750298685784, "grad_norm": 0.22216689304713533, "learning_rate": 3.230273141122914e-05, "loss": 0.3286, "num_tokens": 701992553.0, "step": 3679 }, { "epoch": 1.256016385048643, "grad_norm": 0.1992809928308879, "learning_rate": 3.2296408700050586e-05, "loss": 0.2985, "num_tokens": 702185291.0, "step": 3680 }, { "epoch": 1.256357740228708, "grad_norm": 0.23133256673242655, "learning_rate": 3.229008598887203e-05, "loss": 0.2974, "num_tokens": 702360738.0, "step": 3681 }, { "epoch": 1.256699095408773, "grad_norm": 0.21816273663212704, "learning_rate": 3.228376327769347e-05, "loss": 0.2903, "num_tokens": 702542507.0, "step": 3682 }, { "epoch": 1.2570404505888377, "grad_norm": 0.22915122137844696, "learning_rate": 3.227744056651492e-05, "loss": 0.303, "num_tokens": 702734244.0, "step": 3683 }, { "epoch": 1.2573818057689026, "grad_norm": 0.22964158937022633, "learning_rate": 3.227111785533637e-05, "loss": 0.3166, "num_tokens": 702933773.0, "step": 3684 }, { "epoch": 1.2577231609489674, "grad_norm": 0.24753509755411357, "learning_rate": 3.2264795144157814e-05, "loss": 0.3093, "num_tokens": 703139251.0, "step": 3685 }, { "epoch": 1.2580645161290323, "grad_norm": 0.2293126291939506, "learning_rate": 3.225847243297926e-05, "loss": 0.2921, "num_tokens": 703339328.0, "step": 3686 }, { "epoch": 1.258405871309097, "grad_norm": 0.23245601517558612, "learning_rate": 3.2252149721800715e-05, "loss": 0.3286, "num_tokens": 703540566.0, "step": 3687 }, { "epoch": 1.258747226489162, "grad_norm": 0.2427971955038666, "learning_rate": 3.2245827010622155e-05, "loss": 0.2914, "num_tokens": 703711009.0, "step": 3688 }, { "epoch": 1.2590885816692268, "grad_norm": 0.2510123183474062, "learning_rate": 3.22395042994436e-05, "loss": 0.3035, "num_tokens": 703898825.0, "step": 3689 }, { "epoch": 1.2594299368492916, "grad_norm": 0.3247275602003065, "learning_rate": 3.223318158826505e-05, "loss": 0.3163, "num_tokens": 704102639.0, "step": 3690 }, { "epoch": 1.2597712920293564, "grad_norm": 0.24772995544156765, "learning_rate": 3.2226858877086496e-05, "loss": 0.3086, "num_tokens": 704272822.0, "step": 3691 }, { "epoch": 1.2601126472094215, "grad_norm": 0.23701995838957385, "learning_rate": 3.222053616590794e-05, "loss": 0.3392, "num_tokens": 704526089.0, "step": 3692 }, { "epoch": 1.2604540023894861, "grad_norm": 0.18806034676156072, "learning_rate": 3.221421345472939e-05, "loss": 0.31, "num_tokens": 704735261.0, "step": 3693 }, { "epoch": 1.2607953575695512, "grad_norm": 0.23320387982865287, "learning_rate": 3.220789074355084e-05, "loss": 0.3162, "num_tokens": 704908579.0, "step": 3694 }, { "epoch": 1.261136712749616, "grad_norm": 0.2331557972148268, "learning_rate": 3.2201568032372284e-05, "loss": 0.2947, "num_tokens": 705113151.0, "step": 3695 }, { "epoch": 1.2614780679296809, "grad_norm": 0.21122632553345444, "learning_rate": 3.219524532119373e-05, "loss": 0.3532, "num_tokens": 705301546.0, "step": 3696 }, { "epoch": 1.2618194231097457, "grad_norm": 0.22790536575184675, "learning_rate": 3.218892261001518e-05, "loss": 0.2772, "num_tokens": 705475370.0, "step": 3697 }, { "epoch": 1.2621607782898105, "grad_norm": 0.23997432884403216, "learning_rate": 3.2182599898836625e-05, "loss": 0.3345, "num_tokens": 705663218.0, "step": 3698 }, { "epoch": 1.2625021334698754, "grad_norm": 0.21255153681529648, "learning_rate": 3.217627718765807e-05, "loss": 0.2951, "num_tokens": 705828694.0, "step": 3699 }, { "epoch": 1.2628434886499402, "grad_norm": 0.23314781960344147, "learning_rate": 3.216995447647951e-05, "loss": 0.2855, "num_tokens": 705990074.0, "step": 3700 }, { "epoch": 1.263184843830005, "grad_norm": 0.22358036333727596, "learning_rate": 3.216363176530096e-05, "loss": 0.3283, "num_tokens": 706163104.0, "step": 3701 }, { "epoch": 1.26352619901007, "grad_norm": 0.25721112033258764, "learning_rate": 3.2157309054122406e-05, "loss": 0.2999, "num_tokens": 706309565.0, "step": 3702 }, { "epoch": 1.263867554190135, "grad_norm": 0.23186709477469683, "learning_rate": 3.215098634294385e-05, "loss": 0.322, "num_tokens": 706535904.0, "step": 3703 }, { "epoch": 1.2642089093701996, "grad_norm": 0.2128206043035238, "learning_rate": 3.214466363176531e-05, "loss": 0.2983, "num_tokens": 706734140.0, "step": 3704 }, { "epoch": 1.2645502645502646, "grad_norm": 0.21379121901950254, "learning_rate": 3.2138340920586754e-05, "loss": 0.3101, "num_tokens": 706947590.0, "step": 3705 }, { "epoch": 1.2648916197303295, "grad_norm": 0.23438760516218315, "learning_rate": 3.2132018209408194e-05, "loss": 0.3269, "num_tokens": 707111204.0, "step": 3706 }, { "epoch": 1.2652329749103943, "grad_norm": 0.22840174308808775, "learning_rate": 3.212569549822964e-05, "loss": 0.2797, "num_tokens": 707292318.0, "step": 3707 }, { "epoch": 1.2655743300904592, "grad_norm": 0.2493388488468107, "learning_rate": 3.211937278705109e-05, "loss": 0.3134, "num_tokens": 707450770.0, "step": 3708 }, { "epoch": 1.265915685270524, "grad_norm": 0.2244804435047141, "learning_rate": 3.2113050075872535e-05, "loss": 0.2934, "num_tokens": 707677711.0, "step": 3709 }, { "epoch": 1.2662570404505888, "grad_norm": 0.23291406305203832, "learning_rate": 3.210672736469398e-05, "loss": 0.2802, "num_tokens": 707838731.0, "step": 3710 }, { "epoch": 1.2665983956306537, "grad_norm": 0.25045738485338787, "learning_rate": 3.210040465351543e-05, "loss": 0.3317, "num_tokens": 708026151.0, "step": 3711 }, { "epoch": 1.2669397508107185, "grad_norm": 0.2773785033554573, "learning_rate": 3.2094081942336876e-05, "loss": 0.2977, "num_tokens": 708243355.0, "step": 3712 }, { "epoch": 1.2672811059907834, "grad_norm": 0.21901896273651175, "learning_rate": 3.208775923115832e-05, "loss": 0.3069, "num_tokens": 708457075.0, "step": 3713 }, { "epoch": 1.2676224611708482, "grad_norm": 0.23145802889488296, "learning_rate": 3.208143651997977e-05, "loss": 0.3092, "num_tokens": 708663863.0, "step": 3714 }, { "epoch": 1.267963816350913, "grad_norm": 0.2503666307596784, "learning_rate": 3.207511380880122e-05, "loss": 0.3287, "num_tokens": 708893632.0, "step": 3715 }, { "epoch": 1.268305171530978, "grad_norm": 0.2301476003241163, "learning_rate": 3.2068791097622664e-05, "loss": 0.3073, "num_tokens": 709070272.0, "step": 3716 }, { "epoch": 1.2686465267110427, "grad_norm": 0.214007009873727, "learning_rate": 3.206246838644411e-05, "loss": 0.3086, "num_tokens": 709252032.0, "step": 3717 }, { "epoch": 1.2689878818911078, "grad_norm": 0.23833756024908556, "learning_rate": 3.205614567526555e-05, "loss": 0.2884, "num_tokens": 709466456.0, "step": 3718 }, { "epoch": 1.2693292370711726, "grad_norm": 0.2012105257752129, "learning_rate": 3.2049822964087e-05, "loss": 0.3019, "num_tokens": 709647375.0, "step": 3719 }, { "epoch": 1.2696705922512375, "grad_norm": 0.24485544408456225, "learning_rate": 3.2043500252908445e-05, "loss": 0.2926, "num_tokens": 709804451.0, "step": 3720 }, { "epoch": 1.2700119474313023, "grad_norm": 0.2472208335961351, "learning_rate": 3.20371775417299e-05, "loss": 0.3065, "num_tokens": 709997392.0, "step": 3721 }, { "epoch": 1.2703533026113671, "grad_norm": 0.20447373430574528, "learning_rate": 3.2030854830551346e-05, "loss": 0.3034, "num_tokens": 710183964.0, "step": 3722 }, { "epoch": 1.270694657791432, "grad_norm": 0.5801632194834543, "learning_rate": 3.202453211937279e-05, "loss": 0.3199, "num_tokens": 710340272.0, "step": 3723 }, { "epoch": 1.2710360129714968, "grad_norm": 0.24550880686964818, "learning_rate": 3.201820940819423e-05, "loss": 0.2781, "num_tokens": 710525837.0, "step": 3724 }, { "epoch": 1.2713773681515617, "grad_norm": 0.21444281863542436, "learning_rate": 3.201188669701568e-05, "loss": 0.3437, "num_tokens": 710740602.0, "step": 3725 }, { "epoch": 1.2717187233316265, "grad_norm": 0.2186930500356881, "learning_rate": 3.200556398583713e-05, "loss": 0.3115, "num_tokens": 710902808.0, "step": 3726 }, { "epoch": 1.2720600785116913, "grad_norm": 0.24611313907564114, "learning_rate": 3.1999241274658574e-05, "loss": 0.2921, "num_tokens": 711095852.0, "step": 3727 }, { "epoch": 1.2724014336917562, "grad_norm": 0.2078910408305269, "learning_rate": 3.199291856348002e-05, "loss": 0.2861, "num_tokens": 711282088.0, "step": 3728 }, { "epoch": 1.2727427888718212, "grad_norm": 0.23831994073342827, "learning_rate": 3.198659585230147e-05, "loss": 0.2771, "num_tokens": 711443428.0, "step": 3729 }, { "epoch": 1.2730841440518859, "grad_norm": 0.20777613869174522, "learning_rate": 3.1980273141122915e-05, "loss": 0.3036, "num_tokens": 711656491.0, "step": 3730 }, { "epoch": 1.273425499231951, "grad_norm": 0.2122882231906737, "learning_rate": 3.197395042994436e-05, "loss": 0.3247, "num_tokens": 711859136.0, "step": 3731 }, { "epoch": 1.2737668544120158, "grad_norm": 0.23444382086757717, "learning_rate": 3.196762771876581e-05, "loss": 0.3636, "num_tokens": 712040946.0, "step": 3732 }, { "epoch": 1.2741082095920806, "grad_norm": 0.6845258258589578, "learning_rate": 3.1961305007587256e-05, "loss": 0.2834, "num_tokens": 712247633.0, "step": 3733 }, { "epoch": 1.2744495647721454, "grad_norm": 0.22386956501348185, "learning_rate": 3.19549822964087e-05, "loss": 0.2589, "num_tokens": 712435165.0, "step": 3734 }, { "epoch": 1.2747909199522103, "grad_norm": 0.23628477336041004, "learning_rate": 3.194865958523015e-05, "loss": 0.3336, "num_tokens": 712651834.0, "step": 3735 }, { "epoch": 1.2751322751322751, "grad_norm": 0.2181718013955449, "learning_rate": 3.194233687405159e-05, "loss": 0.3073, "num_tokens": 712837113.0, "step": 3736 }, { "epoch": 1.27547363031234, "grad_norm": 0.22635454149685055, "learning_rate": 3.1936014162873044e-05, "loss": 0.3248, "num_tokens": 713066559.0, "step": 3737 }, { "epoch": 1.2758149854924048, "grad_norm": 0.24225281131562793, "learning_rate": 3.192969145169449e-05, "loss": 0.3114, "num_tokens": 713257983.0, "step": 3738 }, { "epoch": 1.2761563406724696, "grad_norm": 0.23669780711884716, "learning_rate": 3.192336874051594e-05, "loss": 0.3234, "num_tokens": 713475171.0, "step": 3739 }, { "epoch": 1.2764976958525347, "grad_norm": 0.25036233495545934, "learning_rate": 3.1917046029337385e-05, "loss": 0.2981, "num_tokens": 713691709.0, "step": 3740 }, { "epoch": 1.2768390510325993, "grad_norm": 0.1992940425016491, "learning_rate": 3.191072331815883e-05, "loss": 0.3041, "num_tokens": 713879348.0, "step": 3741 }, { "epoch": 1.2771804062126644, "grad_norm": 0.2868006984578069, "learning_rate": 3.190440060698027e-05, "loss": 0.3007, "num_tokens": 714052054.0, "step": 3742 }, { "epoch": 1.2775217613927292, "grad_norm": 0.2467500102188623, "learning_rate": 3.189807789580172e-05, "loss": 0.3425, "num_tokens": 714261448.0, "step": 3743 }, { "epoch": 1.277863116572794, "grad_norm": 0.23715700603213052, "learning_rate": 3.1891755184623166e-05, "loss": 0.2915, "num_tokens": 714429716.0, "step": 3744 }, { "epoch": 1.278204471752859, "grad_norm": 0.22469497918265924, "learning_rate": 3.188543247344461e-05, "loss": 0.3029, "num_tokens": 714592686.0, "step": 3745 }, { "epoch": 1.2785458269329237, "grad_norm": 0.267422142949784, "learning_rate": 3.187910976226606e-05, "loss": 0.3124, "num_tokens": 714784355.0, "step": 3746 }, { "epoch": 1.2788871821129886, "grad_norm": 0.26493540392093273, "learning_rate": 3.1872787051087513e-05, "loss": 0.3066, "num_tokens": 714981441.0, "step": 3747 }, { "epoch": 1.2792285372930534, "grad_norm": 0.19663039251120107, "learning_rate": 3.1866464339908954e-05, "loss": 0.3105, "num_tokens": 715192334.0, "step": 3748 }, { "epoch": 1.2795698924731183, "grad_norm": 0.22677479167535108, "learning_rate": 3.18601416287304e-05, "loss": 0.291, "num_tokens": 715391681.0, "step": 3749 }, { "epoch": 1.279911247653183, "grad_norm": 0.2285085478093081, "learning_rate": 3.185381891755185e-05, "loss": 0.3003, "num_tokens": 715580585.0, "step": 3750 }, { "epoch": 1.280252602833248, "grad_norm": 0.212243088776987, "learning_rate": 3.1847496206373295e-05, "loss": 0.3254, "num_tokens": 715780825.0, "step": 3751 }, { "epoch": 1.2805939580133128, "grad_norm": 0.21731282710352878, "learning_rate": 3.184117349519474e-05, "loss": 0.2916, "num_tokens": 715995482.0, "step": 3752 }, { "epoch": 1.2809353131933778, "grad_norm": 0.21260135383265885, "learning_rate": 3.183485078401619e-05, "loss": 0.2909, "num_tokens": 716185797.0, "step": 3753 }, { "epoch": 1.2812766683734425, "grad_norm": 0.2580202187996164, "learning_rate": 3.1828528072837636e-05, "loss": 0.3158, "num_tokens": 716347426.0, "step": 3754 }, { "epoch": 1.2816180235535075, "grad_norm": 0.22258603505024946, "learning_rate": 3.182220536165908e-05, "loss": 0.3077, "num_tokens": 716517070.0, "step": 3755 }, { "epoch": 1.2819593787335724, "grad_norm": 0.23903914331891135, "learning_rate": 3.181588265048053e-05, "loss": 0.3182, "num_tokens": 716705805.0, "step": 3756 }, { "epoch": 1.2823007339136372, "grad_norm": 0.2127898226354772, "learning_rate": 3.1809559939301977e-05, "loss": 0.2846, "num_tokens": 716896899.0, "step": 3757 }, { "epoch": 1.282642089093702, "grad_norm": 0.23062216195898957, "learning_rate": 3.1803237228123424e-05, "loss": 0.3149, "num_tokens": 717093385.0, "step": 3758 }, { "epoch": 1.2829834442737669, "grad_norm": 0.2550363820426884, "learning_rate": 3.179691451694487e-05, "loss": 0.2981, "num_tokens": 717253985.0, "step": 3759 }, { "epoch": 1.2833247994538317, "grad_norm": 0.20953479498187202, "learning_rate": 3.179059180576631e-05, "loss": 0.3123, "num_tokens": 717448749.0, "step": 3760 }, { "epoch": 1.2836661546338966, "grad_norm": 0.2295411093935504, "learning_rate": 3.178426909458776e-05, "loss": 0.338, "num_tokens": 717602408.0, "step": 3761 }, { "epoch": 1.2840075098139614, "grad_norm": 0.2622816807123391, "learning_rate": 3.1777946383409205e-05, "loss": 0.3394, "num_tokens": 717825119.0, "step": 3762 }, { "epoch": 1.2843488649940262, "grad_norm": 0.229775447647252, "learning_rate": 3.177162367223065e-05, "loss": 0.3072, "num_tokens": 718005936.0, "step": 3763 }, { "epoch": 1.284690220174091, "grad_norm": 0.23239780174063535, "learning_rate": 3.1765300961052105e-05, "loss": 0.3081, "num_tokens": 718222366.0, "step": 3764 }, { "epoch": 1.285031575354156, "grad_norm": 0.2221491498561401, "learning_rate": 3.175897824987355e-05, "loss": 0.3202, "num_tokens": 718484018.0, "step": 3765 }, { "epoch": 1.285372930534221, "grad_norm": 0.2193377673842512, "learning_rate": 3.175265553869499e-05, "loss": 0.3111, "num_tokens": 718681841.0, "step": 3766 }, { "epoch": 1.2857142857142856, "grad_norm": 0.22983146763184556, "learning_rate": 3.174633282751644e-05, "loss": 0.3221, "num_tokens": 718868125.0, "step": 3767 }, { "epoch": 1.2860556408943506, "grad_norm": 0.22572657267518415, "learning_rate": 3.1740010116337887e-05, "loss": 0.2846, "num_tokens": 719058046.0, "step": 3768 }, { "epoch": 1.2863969960744155, "grad_norm": 0.21674247310373976, "learning_rate": 3.1733687405159334e-05, "loss": 0.2857, "num_tokens": 719261030.0, "step": 3769 }, { "epoch": 1.2867383512544803, "grad_norm": 0.23539082970248282, "learning_rate": 3.172736469398078e-05, "loss": 0.2794, "num_tokens": 719425765.0, "step": 3770 }, { "epoch": 1.2870797064345452, "grad_norm": 0.25197083305393, "learning_rate": 3.172104198280223e-05, "loss": 0.2802, "num_tokens": 719578634.0, "step": 3771 }, { "epoch": 1.28742106161461, "grad_norm": 0.22745850390218808, "learning_rate": 3.1714719271623675e-05, "loss": 0.3234, "num_tokens": 719784555.0, "step": 3772 }, { "epoch": 1.2877624167946748, "grad_norm": 0.21208910404188275, "learning_rate": 3.170839656044512e-05, "loss": 0.2965, "num_tokens": 719955586.0, "step": 3773 }, { "epoch": 1.2881037719747397, "grad_norm": 0.25577311393025604, "learning_rate": 3.170207384926657e-05, "loss": 0.3481, "num_tokens": 720163963.0, "step": 3774 }, { "epoch": 1.2884451271548045, "grad_norm": 0.20570597098506832, "learning_rate": 3.1695751138088015e-05, "loss": 0.3303, "num_tokens": 720356814.0, "step": 3775 }, { "epoch": 1.2887864823348694, "grad_norm": 0.24077258030748055, "learning_rate": 3.168942842690946e-05, "loss": 0.3151, "num_tokens": 720535293.0, "step": 3776 }, { "epoch": 1.2891278375149344, "grad_norm": 0.3778536421596232, "learning_rate": 3.168310571573091e-05, "loss": 0.267, "num_tokens": 720746307.0, "step": 3777 }, { "epoch": 1.289469192694999, "grad_norm": 0.19425878634021257, "learning_rate": 3.167678300455235e-05, "loss": 0.3236, "num_tokens": 720914178.0, "step": 3778 }, { "epoch": 1.289810547875064, "grad_norm": 0.2773233447617896, "learning_rate": 3.1670460293373797e-05, "loss": 0.3089, "num_tokens": 721161642.0, "step": 3779 }, { "epoch": 1.2901519030551287, "grad_norm": 0.2220924669786457, "learning_rate": 3.1664137582195244e-05, "loss": 0.3003, "num_tokens": 721322207.0, "step": 3780 }, { "epoch": 1.2904932582351938, "grad_norm": 0.2002067183230622, "learning_rate": 3.16578148710167e-05, "loss": 0.3233, "num_tokens": 721509816.0, "step": 3781 }, { "epoch": 1.2908346134152586, "grad_norm": 0.2831302452419496, "learning_rate": 3.1651492159838144e-05, "loss": 0.2989, "num_tokens": 721677080.0, "step": 3782 }, { "epoch": 1.2911759685953235, "grad_norm": 0.19842223223791086, "learning_rate": 3.1645169448659585e-05, "loss": 0.2946, "num_tokens": 721849255.0, "step": 3783 }, { "epoch": 1.2915173237753883, "grad_norm": 0.2161838555603592, "learning_rate": 3.163884673748103e-05, "loss": 0.2985, "num_tokens": 722038668.0, "step": 3784 }, { "epoch": 1.2918586789554531, "grad_norm": 0.2526363425213956, "learning_rate": 3.163252402630248e-05, "loss": 0.2893, "num_tokens": 722219735.0, "step": 3785 }, { "epoch": 1.292200034135518, "grad_norm": 0.2215881285634104, "learning_rate": 3.1626201315123925e-05, "loss": 0.3241, "num_tokens": 722384800.0, "step": 3786 }, { "epoch": 1.2925413893155828, "grad_norm": 0.24771242671732796, "learning_rate": 3.161987860394537e-05, "loss": 0.3104, "num_tokens": 722559782.0, "step": 3787 }, { "epoch": 1.2928827444956477, "grad_norm": 0.24941996825938423, "learning_rate": 3.161355589276682e-05, "loss": 0.3297, "num_tokens": 722780876.0, "step": 3788 }, { "epoch": 1.2932240996757125, "grad_norm": 0.21463855460846437, "learning_rate": 3.1607233181588266e-05, "loss": 0.3393, "num_tokens": 723000581.0, "step": 3789 }, { "epoch": 1.2935654548557776, "grad_norm": 0.22220387493794516, "learning_rate": 3.1600910470409713e-05, "loss": 0.3338, "num_tokens": 723201674.0, "step": 3790 }, { "epoch": 1.2939068100358422, "grad_norm": 0.22809268304215624, "learning_rate": 3.159458775923116e-05, "loss": 0.3128, "num_tokens": 723425154.0, "step": 3791 }, { "epoch": 1.2942481652159072, "grad_norm": 0.23076609869783998, "learning_rate": 3.158826504805261e-05, "loss": 0.3008, "num_tokens": 723608532.0, "step": 3792 }, { "epoch": 1.294589520395972, "grad_norm": 0.22504609512162546, "learning_rate": 3.1581942336874054e-05, "loss": 0.2832, "num_tokens": 723778904.0, "step": 3793 }, { "epoch": 1.294930875576037, "grad_norm": 0.21338273680159495, "learning_rate": 3.15756196256955e-05, "loss": 0.3032, "num_tokens": 724007473.0, "step": 3794 }, { "epoch": 1.2952722307561018, "grad_norm": 0.220278251397385, "learning_rate": 3.156929691451694e-05, "loss": 0.3385, "num_tokens": 724205046.0, "step": 3795 }, { "epoch": 1.2956135859361666, "grad_norm": 0.20017846930339722, "learning_rate": 3.156297420333839e-05, "loss": 0.3257, "num_tokens": 724408444.0, "step": 3796 }, { "epoch": 1.2959549411162314, "grad_norm": 0.2269697979175376, "learning_rate": 3.1556651492159836e-05, "loss": 0.3039, "num_tokens": 724603613.0, "step": 3797 }, { "epoch": 1.2962962962962963, "grad_norm": 0.24073179340885537, "learning_rate": 3.155032878098129e-05, "loss": 0.3241, "num_tokens": 724811748.0, "step": 3798 }, { "epoch": 1.2966376514763611, "grad_norm": 0.19341451090020378, "learning_rate": 3.1544006069802736e-05, "loss": 0.3294, "num_tokens": 725004341.0, "step": 3799 }, { "epoch": 1.296979006656426, "grad_norm": 0.23115869728146812, "learning_rate": 3.153768335862418e-05, "loss": 0.3271, "num_tokens": 725169439.0, "step": 3800 }, { "epoch": 1.2973203618364908, "grad_norm": 0.23513732457698075, "learning_rate": 3.1531360647445623e-05, "loss": 0.3164, "num_tokens": 725373219.0, "step": 3801 }, { "epoch": 1.2976617170165556, "grad_norm": 0.2157012281119993, "learning_rate": 3.152503793626707e-05, "loss": 0.3151, "num_tokens": 725579054.0, "step": 3802 }, { "epoch": 1.2980030721966207, "grad_norm": 0.21857469749492558, "learning_rate": 3.151871522508852e-05, "loss": 0.3116, "num_tokens": 725763635.0, "step": 3803 }, { "epoch": 1.2983444273766853, "grad_norm": 0.20773303730730805, "learning_rate": 3.1512392513909964e-05, "loss": 0.3289, "num_tokens": 726000951.0, "step": 3804 }, { "epoch": 1.2986857825567504, "grad_norm": 0.22269865982442374, "learning_rate": 3.150606980273141e-05, "loss": 0.3084, "num_tokens": 726183730.0, "step": 3805 }, { "epoch": 1.2990271377368152, "grad_norm": 0.23036028603170242, "learning_rate": 3.149974709155286e-05, "loss": 0.3084, "num_tokens": 726403028.0, "step": 3806 }, { "epoch": 1.29936849291688, "grad_norm": 0.19760230779973803, "learning_rate": 3.1493424380374305e-05, "loss": 0.3068, "num_tokens": 726591027.0, "step": 3807 }, { "epoch": 1.299709848096945, "grad_norm": 0.21500840693930423, "learning_rate": 3.148710166919575e-05, "loss": 0.3372, "num_tokens": 726808478.0, "step": 3808 }, { "epoch": 1.3000512032770097, "grad_norm": 0.24115531967304674, "learning_rate": 3.14807789580172e-05, "loss": 0.3213, "num_tokens": 726978424.0, "step": 3809 }, { "epoch": 1.3003925584570746, "grad_norm": 0.24192450461534862, "learning_rate": 3.1474456246838646e-05, "loss": 0.3091, "num_tokens": 727132182.0, "step": 3810 }, { "epoch": 1.3007339136371394, "grad_norm": 0.27455831163265737, "learning_rate": 3.146813353566009e-05, "loss": 0.3271, "num_tokens": 727304954.0, "step": 3811 }, { "epoch": 1.3010752688172043, "grad_norm": 0.22202029746837262, "learning_rate": 3.146181082448154e-05, "loss": 0.3101, "num_tokens": 727442831.0, "step": 3812 }, { "epoch": 1.301416623997269, "grad_norm": 0.254610620114, "learning_rate": 3.145548811330298e-05, "loss": 0.3072, "num_tokens": 727638271.0, "step": 3813 }, { "epoch": 1.301757979177334, "grad_norm": 0.22503269914247906, "learning_rate": 3.1449165402124434e-05, "loss": 0.3176, "num_tokens": 727844835.0, "step": 3814 }, { "epoch": 1.3020993343573988, "grad_norm": 0.2142428975986647, "learning_rate": 3.144284269094588e-05, "loss": 0.3687, "num_tokens": 728032486.0, "step": 3815 }, { "epoch": 1.3024406895374638, "grad_norm": 0.2613931050993997, "learning_rate": 3.143651997976733e-05, "loss": 0.2906, "num_tokens": 728207595.0, "step": 3816 }, { "epoch": 1.3027820447175285, "grad_norm": 0.2506892779528119, "learning_rate": 3.1430197268588775e-05, "loss": 0.3059, "num_tokens": 728393088.0, "step": 3817 }, { "epoch": 1.3031233998975935, "grad_norm": 0.213075286691827, "learning_rate": 3.142387455741022e-05, "loss": 0.292, "num_tokens": 728582463.0, "step": 3818 }, { "epoch": 1.3034647550776584, "grad_norm": 0.2293028402075883, "learning_rate": 3.141755184623166e-05, "loss": 0.282, "num_tokens": 728770286.0, "step": 3819 }, { "epoch": 1.3038061102577232, "grad_norm": 0.19953896176079455, "learning_rate": 3.141122913505311e-05, "loss": 0.2866, "num_tokens": 728982546.0, "step": 3820 }, { "epoch": 1.304147465437788, "grad_norm": 0.2206885288302626, "learning_rate": 3.1404906423874556e-05, "loss": 0.3065, "num_tokens": 729212584.0, "step": 3821 }, { "epoch": 1.3044888206178529, "grad_norm": 0.20424615170241445, "learning_rate": 3.1398583712696e-05, "loss": 0.3042, "num_tokens": 729408317.0, "step": 3822 }, { "epoch": 1.3048301757979177, "grad_norm": 0.22277426812304424, "learning_rate": 3.139226100151745e-05, "loss": 0.3177, "num_tokens": 729579044.0, "step": 3823 }, { "epoch": 1.3051715309779826, "grad_norm": 0.26714083556030405, "learning_rate": 3.1385938290338904e-05, "loss": 0.2982, "num_tokens": 729744190.0, "step": 3824 }, { "epoch": 1.3055128861580474, "grad_norm": 0.27837596629874733, "learning_rate": 3.1379615579160344e-05, "loss": 0.3146, "num_tokens": 729926963.0, "step": 3825 }, { "epoch": 1.3058542413381122, "grad_norm": 0.22594833160440628, "learning_rate": 3.137329286798179e-05, "loss": 0.3106, "num_tokens": 730142342.0, "step": 3826 }, { "epoch": 1.3061955965181773, "grad_norm": 0.24724589866692817, "learning_rate": 3.136697015680324e-05, "loss": 0.3381, "num_tokens": 730353331.0, "step": 3827 }, { "epoch": 1.306536951698242, "grad_norm": 0.21867463280134344, "learning_rate": 3.1360647445624685e-05, "loss": 0.2961, "num_tokens": 730532687.0, "step": 3828 }, { "epoch": 1.306878306878307, "grad_norm": 0.2249255868481778, "learning_rate": 3.135432473444613e-05, "loss": 0.2847, "num_tokens": 730748630.0, "step": 3829 }, { "epoch": 1.3072196620583718, "grad_norm": 0.2314564955551673, "learning_rate": 3.134800202326758e-05, "loss": 0.3198, "num_tokens": 730904880.0, "step": 3830 }, { "epoch": 1.3075610172384367, "grad_norm": 0.23559819895859901, "learning_rate": 3.1341679312089026e-05, "loss": 0.2958, "num_tokens": 731079671.0, "step": 3831 }, { "epoch": 1.3079023724185015, "grad_norm": 0.23856724963852152, "learning_rate": 3.133535660091047e-05, "loss": 0.3194, "num_tokens": 731306235.0, "step": 3832 }, { "epoch": 1.3082437275985663, "grad_norm": 0.22934647270477873, "learning_rate": 3.132903388973192e-05, "loss": 0.3332, "num_tokens": 731504522.0, "step": 3833 }, { "epoch": 1.3085850827786312, "grad_norm": 0.21783491284494938, "learning_rate": 3.132271117855337e-05, "loss": 0.3298, "num_tokens": 731691017.0, "step": 3834 }, { "epoch": 1.308926437958696, "grad_norm": 0.2644632646401514, "learning_rate": 3.1316388467374814e-05, "loss": 0.2715, "num_tokens": 731854456.0, "step": 3835 }, { "epoch": 1.3092677931387608, "grad_norm": 0.19449919865148324, "learning_rate": 3.131006575619626e-05, "loss": 0.3023, "num_tokens": 732063105.0, "step": 3836 }, { "epoch": 1.3096091483188257, "grad_norm": 0.2808266714175757, "learning_rate": 3.13037430450177e-05, "loss": 0.3163, "num_tokens": 732214390.0, "step": 3837 }, { "epoch": 1.3099505034988905, "grad_norm": 0.2173638563368722, "learning_rate": 3.129742033383915e-05, "loss": 0.2798, "num_tokens": 732355471.0, "step": 3838 }, { "epoch": 1.3102918586789554, "grad_norm": 0.20201486444955655, "learning_rate": 3.1291097622660595e-05, "loss": 0.3375, "num_tokens": 732581558.0, "step": 3839 }, { "epoch": 1.3106332138590204, "grad_norm": 0.25670795961067344, "learning_rate": 3.128477491148204e-05, "loss": 0.3197, "num_tokens": 732773440.0, "step": 3840 }, { "epoch": 1.310974569039085, "grad_norm": 0.2248669968636468, "learning_rate": 3.1278452200303496e-05, "loss": 0.3049, "num_tokens": 732962807.0, "step": 3841 }, { "epoch": 1.31131592421915, "grad_norm": 0.19913181585354134, "learning_rate": 3.127212948912494e-05, "loss": 0.2865, "num_tokens": 733155060.0, "step": 3842 }, { "epoch": 1.311657279399215, "grad_norm": 0.2357048854301047, "learning_rate": 3.126580677794638e-05, "loss": 0.3371, "num_tokens": 733390199.0, "step": 3843 }, { "epoch": 1.3119986345792798, "grad_norm": 0.2098324576555251, "learning_rate": 3.125948406676783e-05, "loss": 0.3416, "num_tokens": 733581461.0, "step": 3844 }, { "epoch": 1.3123399897593446, "grad_norm": 0.24981563890842548, "learning_rate": 3.125316135558928e-05, "loss": 0.3062, "num_tokens": 733820508.0, "step": 3845 }, { "epoch": 1.3126813449394095, "grad_norm": 0.24175986771558056, "learning_rate": 3.1246838644410724e-05, "loss": 0.3063, "num_tokens": 733963639.0, "step": 3846 }, { "epoch": 1.3130227001194743, "grad_norm": 0.23331382303489756, "learning_rate": 3.124051593323217e-05, "loss": 0.2958, "num_tokens": 734140829.0, "step": 3847 }, { "epoch": 1.3133640552995391, "grad_norm": 0.22255352973078008, "learning_rate": 3.123419322205362e-05, "loss": 0.3184, "num_tokens": 734345746.0, "step": 3848 }, { "epoch": 1.313705410479604, "grad_norm": 0.2050278713155755, "learning_rate": 3.1227870510875065e-05, "loss": 0.3329, "num_tokens": 734563315.0, "step": 3849 }, { "epoch": 1.3140467656596688, "grad_norm": 0.21676564350331978, "learning_rate": 3.122154779969651e-05, "loss": 0.3225, "num_tokens": 734760953.0, "step": 3850 }, { "epoch": 1.3143881208397337, "grad_norm": 0.2406610201607442, "learning_rate": 3.121522508851796e-05, "loss": 0.3087, "num_tokens": 734958566.0, "step": 3851 }, { "epoch": 1.3147294760197985, "grad_norm": 0.21348440460628443, "learning_rate": 3.1208902377339406e-05, "loss": 0.2957, "num_tokens": 735114407.0, "step": 3852 }, { "epoch": 1.3150708311998636, "grad_norm": 0.24143799607295413, "learning_rate": 3.120257966616085e-05, "loss": 0.2984, "num_tokens": 735306822.0, "step": 3853 }, { "epoch": 1.3154121863799282, "grad_norm": 0.1981646857828532, "learning_rate": 3.11962569549823e-05, "loss": 0.3071, "num_tokens": 735468938.0, "step": 3854 }, { "epoch": 1.3157535415599932, "grad_norm": 0.2493092882571192, "learning_rate": 3.118993424380374e-05, "loss": 0.312, "num_tokens": 735640380.0, "step": 3855 }, { "epoch": 1.316094896740058, "grad_norm": 0.2278370312558036, "learning_rate": 3.118361153262519e-05, "loss": 0.3578, "num_tokens": 735865016.0, "step": 3856 }, { "epoch": 1.316436251920123, "grad_norm": 0.20307867924689096, "learning_rate": 3.1177288821446634e-05, "loss": 0.2871, "num_tokens": 736052500.0, "step": 3857 }, { "epoch": 1.3167776071001878, "grad_norm": 0.24439008031912823, "learning_rate": 3.117096611026809e-05, "loss": 0.3218, "num_tokens": 736263699.0, "step": 3858 }, { "epoch": 1.3171189622802526, "grad_norm": 0.22954617115854895, "learning_rate": 3.1164643399089535e-05, "loss": 0.3228, "num_tokens": 736477421.0, "step": 3859 }, { "epoch": 1.3174603174603174, "grad_norm": 0.22016145537004236, "learning_rate": 3.115832068791098e-05, "loss": 0.3139, "num_tokens": 736648254.0, "step": 3860 }, { "epoch": 1.3178016726403823, "grad_norm": 0.24415449347179183, "learning_rate": 3.115199797673242e-05, "loss": 0.3155, "num_tokens": 736814449.0, "step": 3861 }, { "epoch": 1.3181430278204471, "grad_norm": 0.2599705840589788, "learning_rate": 3.114567526555387e-05, "loss": 0.3224, "num_tokens": 736979480.0, "step": 3862 }, { "epoch": 1.318484383000512, "grad_norm": 0.19111965535146955, "learning_rate": 3.1139352554375316e-05, "loss": 0.3005, "num_tokens": 737177519.0, "step": 3863 }, { "epoch": 1.318825738180577, "grad_norm": 0.3910642763903475, "learning_rate": 3.113302984319676e-05, "loss": 0.3045, "num_tokens": 737393210.0, "step": 3864 }, { "epoch": 1.3191670933606416, "grad_norm": 0.22121344255604583, "learning_rate": 3.112670713201821e-05, "loss": 0.328, "num_tokens": 737581449.0, "step": 3865 }, { "epoch": 1.3195084485407067, "grad_norm": 0.20374778311734745, "learning_rate": 3.112038442083966e-05, "loss": 0.2991, "num_tokens": 737809232.0, "step": 3866 }, { "epoch": 1.3198498037207715, "grad_norm": 0.20563986938212966, "learning_rate": 3.1114061709661104e-05, "loss": 0.3298, "num_tokens": 738004404.0, "step": 3867 }, { "epoch": 1.3201911589008364, "grad_norm": 0.25116282189772837, "learning_rate": 3.110773899848255e-05, "loss": 0.3406, "num_tokens": 738219049.0, "step": 3868 }, { "epoch": 1.3205325140809012, "grad_norm": 0.2543959779465116, "learning_rate": 3.1101416287304e-05, "loss": 0.308, "num_tokens": 738425985.0, "step": 3869 }, { "epoch": 1.320873869260966, "grad_norm": 0.21719736895033703, "learning_rate": 3.1095093576125445e-05, "loss": 0.2876, "num_tokens": 738597497.0, "step": 3870 }, { "epoch": 1.321215224441031, "grad_norm": 0.24621638638263368, "learning_rate": 3.108877086494689e-05, "loss": 0.3612, "num_tokens": 738774750.0, "step": 3871 }, { "epoch": 1.3215565796210957, "grad_norm": 0.20921730387396978, "learning_rate": 3.108244815376834e-05, "loss": 0.2975, "num_tokens": 738979994.0, "step": 3872 }, { "epoch": 1.3218979348011606, "grad_norm": 0.2590549488388316, "learning_rate": 3.107612544258978e-05, "loss": 0.3266, "num_tokens": 739203189.0, "step": 3873 }, { "epoch": 1.3222392899812254, "grad_norm": 0.20244146995627277, "learning_rate": 3.1069802731411226e-05, "loss": 0.3224, "num_tokens": 739399859.0, "step": 3874 }, { "epoch": 1.3225806451612903, "grad_norm": 0.22742665590787345, "learning_rate": 3.106348002023268e-05, "loss": 0.3173, "num_tokens": 739568697.0, "step": 3875 }, { "epoch": 1.322922000341355, "grad_norm": 0.22884006668537532, "learning_rate": 3.105715730905413e-05, "loss": 0.2909, "num_tokens": 739729765.0, "step": 3876 }, { "epoch": 1.3232633555214202, "grad_norm": 0.217181164830258, "learning_rate": 3.1050834597875574e-05, "loss": 0.2997, "num_tokens": 739928574.0, "step": 3877 }, { "epoch": 1.3236047107014848, "grad_norm": 0.22712590775986002, "learning_rate": 3.104451188669702e-05, "loss": 0.3004, "num_tokens": 740096360.0, "step": 3878 }, { "epoch": 1.3239460658815498, "grad_norm": 0.2385677465016813, "learning_rate": 3.103818917551846e-05, "loss": 0.3759, "num_tokens": 740313580.0, "step": 3879 }, { "epoch": 1.3242874210616147, "grad_norm": 1.7206548943039244, "learning_rate": 3.103186646433991e-05, "loss": 0.3307, "num_tokens": 740548046.0, "step": 3880 }, { "epoch": 1.3246287762416795, "grad_norm": 0.23875801620454562, "learning_rate": 3.1025543753161355e-05, "loss": 0.3357, "num_tokens": 740746076.0, "step": 3881 }, { "epoch": 1.3249701314217444, "grad_norm": 0.2296032108332221, "learning_rate": 3.10192210419828e-05, "loss": 0.2869, "num_tokens": 740888805.0, "step": 3882 }, { "epoch": 1.3253114866018092, "grad_norm": 0.22971980484850735, "learning_rate": 3.101289833080425e-05, "loss": 0.3164, "num_tokens": 741092572.0, "step": 3883 }, { "epoch": 1.325652841781874, "grad_norm": 0.2548751554707363, "learning_rate": 3.10065756196257e-05, "loss": 0.3034, "num_tokens": 741280799.0, "step": 3884 }, { "epoch": 1.3259941969619389, "grad_norm": 0.29856191677005695, "learning_rate": 3.100025290844714e-05, "loss": 0.2703, "num_tokens": 741445410.0, "step": 3885 }, { "epoch": 1.3263355521420037, "grad_norm": 0.23950027014773162, "learning_rate": 3.099393019726859e-05, "loss": 0.2726, "num_tokens": 741610219.0, "step": 3886 }, { "epoch": 1.3266769073220686, "grad_norm": 0.18715762740532152, "learning_rate": 3.098760748609004e-05, "loss": 0.3073, "num_tokens": 741827893.0, "step": 3887 }, { "epoch": 1.3270182625021334, "grad_norm": 0.1924639471418851, "learning_rate": 3.0981284774911484e-05, "loss": 0.325, "num_tokens": 742082874.0, "step": 3888 }, { "epoch": 1.3273596176821982, "grad_norm": 0.20453464692984985, "learning_rate": 3.097496206373293e-05, "loss": 0.2769, "num_tokens": 742247800.0, "step": 3889 }, { "epoch": 1.3277009728622633, "grad_norm": 0.22349294770364253, "learning_rate": 3.096863935255438e-05, "loss": 0.3106, "num_tokens": 742434875.0, "step": 3890 }, { "epoch": 1.328042328042328, "grad_norm": 0.23650992108090693, "learning_rate": 3.0962316641375825e-05, "loss": 0.2976, "num_tokens": 742603848.0, "step": 3891 }, { "epoch": 1.328383683222393, "grad_norm": 0.3614523679068905, "learning_rate": 3.095599393019727e-05, "loss": 0.3156, "num_tokens": 742785936.0, "step": 3892 }, { "epoch": 1.3287250384024578, "grad_norm": 0.24071666176413847, "learning_rate": 3.094967121901872e-05, "loss": 0.297, "num_tokens": 742996131.0, "step": 3893 }, { "epoch": 1.3290663935825227, "grad_norm": 0.23240446088861877, "learning_rate": 3.0943348507840166e-05, "loss": 0.3287, "num_tokens": 743187339.0, "step": 3894 }, { "epoch": 1.3294077487625875, "grad_norm": 0.23136797016463967, "learning_rate": 3.093702579666161e-05, "loss": 0.3072, "num_tokens": 743370986.0, "step": 3895 }, { "epoch": 1.3297491039426523, "grad_norm": 0.22858978523812315, "learning_rate": 3.093070308548306e-05, "loss": 0.3324, "num_tokens": 743555274.0, "step": 3896 }, { "epoch": 1.3300904591227172, "grad_norm": 0.23170026782891445, "learning_rate": 3.09243803743045e-05, "loss": 0.3198, "num_tokens": 743769930.0, "step": 3897 }, { "epoch": 1.330431814302782, "grad_norm": 0.25203841879403277, "learning_rate": 3.091805766312595e-05, "loss": 0.3282, "num_tokens": 743931184.0, "step": 3898 }, { "epoch": 1.3307731694828469, "grad_norm": 0.27449724406338455, "learning_rate": 3.0911734951947394e-05, "loss": 0.3115, "num_tokens": 744112487.0, "step": 3899 }, { "epoch": 1.3311145246629117, "grad_norm": 0.2029771858060113, "learning_rate": 3.090541224076884e-05, "loss": 0.289, "num_tokens": 744309167.0, "step": 3900 }, { "epoch": 1.3314558798429768, "grad_norm": 0.21885784345087297, "learning_rate": 3.0899089529590295e-05, "loss": 0.3446, "num_tokens": 744489143.0, "step": 3901 }, { "epoch": 1.3317972350230414, "grad_norm": 0.2175099270518734, "learning_rate": 3.089276681841174e-05, "loss": 0.315, "num_tokens": 744689697.0, "step": 3902 }, { "epoch": 1.3321385902031064, "grad_norm": 0.23242150927277852, "learning_rate": 3.088644410723318e-05, "loss": 0.305, "num_tokens": 744934222.0, "step": 3903 }, { "epoch": 1.3324799453831713, "grad_norm": 0.21479512030729034, "learning_rate": 3.088012139605463e-05, "loss": 0.336, "num_tokens": 745096765.0, "step": 3904 }, { "epoch": 1.332821300563236, "grad_norm": 0.24846511510414993, "learning_rate": 3.0873798684876076e-05, "loss": 0.3187, "num_tokens": 745284905.0, "step": 3905 }, { "epoch": 1.333162655743301, "grad_norm": 0.22030607245711578, "learning_rate": 3.086747597369752e-05, "loss": 0.3343, "num_tokens": 745517568.0, "step": 3906 }, { "epoch": 1.3335040109233658, "grad_norm": 0.22280097938246082, "learning_rate": 3.086115326251897e-05, "loss": 0.3093, "num_tokens": 745691855.0, "step": 3907 }, { "epoch": 1.3338453661034306, "grad_norm": 0.21877082985527738, "learning_rate": 3.085483055134042e-05, "loss": 0.2941, "num_tokens": 745905455.0, "step": 3908 }, { "epoch": 1.3341867212834955, "grad_norm": 0.20880179187298742, "learning_rate": 3.0848507840161864e-05, "loss": 0.3071, "num_tokens": 746083052.0, "step": 3909 }, { "epoch": 1.3345280764635603, "grad_norm": 0.22554891238840172, "learning_rate": 3.084218512898331e-05, "loss": 0.2949, "num_tokens": 746281028.0, "step": 3910 }, { "epoch": 1.3348694316436251, "grad_norm": 0.20484459959833334, "learning_rate": 3.083586241780476e-05, "loss": 0.3239, "num_tokens": 746478305.0, "step": 3911 }, { "epoch": 1.33521078682369, "grad_norm": 0.22383773459166212, "learning_rate": 3.0829539706626205e-05, "loss": 0.3024, "num_tokens": 746678846.0, "step": 3912 }, { "epoch": 1.3355521420037548, "grad_norm": 0.22670942460463825, "learning_rate": 3.082321699544765e-05, "loss": 0.3043, "num_tokens": 746904422.0, "step": 3913 }, { "epoch": 1.33589349718382, "grad_norm": 0.19684201285489827, "learning_rate": 3.081689428426909e-05, "loss": 0.3199, "num_tokens": 747097014.0, "step": 3914 }, { "epoch": 1.3362348523638845, "grad_norm": 0.2456493379144563, "learning_rate": 3.081057157309054e-05, "loss": 0.3149, "num_tokens": 747290239.0, "step": 3915 }, { "epoch": 1.3365762075439496, "grad_norm": 0.23668338815865672, "learning_rate": 3.0804248861911986e-05, "loss": 0.3065, "num_tokens": 747461514.0, "step": 3916 }, { "epoch": 1.3369175627240144, "grad_norm": 0.2890156890429197, "learning_rate": 3.079792615073343e-05, "loss": 0.3234, "num_tokens": 747627256.0, "step": 3917 }, { "epoch": 1.3372589179040792, "grad_norm": 0.21295886607348136, "learning_rate": 3.0791603439554887e-05, "loss": 0.278, "num_tokens": 747794911.0, "step": 3918 }, { "epoch": 1.337600273084144, "grad_norm": 0.24408143163998083, "learning_rate": 3.0785280728376334e-05, "loss": 0.3567, "num_tokens": 747976757.0, "step": 3919 }, { "epoch": 1.337941628264209, "grad_norm": 0.23059149468873077, "learning_rate": 3.0778958017197774e-05, "loss": 0.3184, "num_tokens": 748179163.0, "step": 3920 }, { "epoch": 1.3382829834442738, "grad_norm": 0.20672598241769036, "learning_rate": 3.077263530601922e-05, "loss": 0.3347, "num_tokens": 748363928.0, "step": 3921 }, { "epoch": 1.3386243386243386, "grad_norm": 0.2311262122931047, "learning_rate": 3.076631259484067e-05, "loss": 0.311, "num_tokens": 748621555.0, "step": 3922 }, { "epoch": 1.3389656938044034, "grad_norm": 0.21914562877915233, "learning_rate": 3.0759989883662115e-05, "loss": 0.3039, "num_tokens": 748803667.0, "step": 3923 }, { "epoch": 1.3393070489844683, "grad_norm": 0.22982511624184873, "learning_rate": 3.075366717248356e-05, "loss": 0.3146, "num_tokens": 749003472.0, "step": 3924 }, { "epoch": 1.3396484041645331, "grad_norm": 0.19812835056984, "learning_rate": 3.074734446130501e-05, "loss": 0.3249, "num_tokens": 749239198.0, "step": 3925 }, { "epoch": 1.339989759344598, "grad_norm": 0.23377500158263548, "learning_rate": 3.0741021750126456e-05, "loss": 0.2945, "num_tokens": 749414182.0, "step": 3926 }, { "epoch": 1.340331114524663, "grad_norm": 0.23752990537063773, "learning_rate": 3.07346990389479e-05, "loss": 0.3056, "num_tokens": 749579263.0, "step": 3927 }, { "epoch": 1.3406724697047276, "grad_norm": 0.21167183904190648, "learning_rate": 3.072837632776935e-05, "loss": 0.2957, "num_tokens": 749757884.0, "step": 3928 }, { "epoch": 1.3410138248847927, "grad_norm": 0.4441854572692047, "learning_rate": 3.07220536165908e-05, "loss": 0.2948, "num_tokens": 749961956.0, "step": 3929 }, { "epoch": 1.3413551800648575, "grad_norm": 0.25498746900589253, "learning_rate": 3.0715730905412244e-05, "loss": 0.2968, "num_tokens": 750138130.0, "step": 3930 }, { "epoch": 1.3416965352449224, "grad_norm": 0.22910380619080745, "learning_rate": 3.070940819423369e-05, "loss": 0.3332, "num_tokens": 750321672.0, "step": 3931 }, { "epoch": 1.3420378904249872, "grad_norm": 0.21646421323420598, "learning_rate": 3.070308548305513e-05, "loss": 0.2847, "num_tokens": 750515168.0, "step": 3932 }, { "epoch": 1.342379245605052, "grad_norm": 0.23211019187415333, "learning_rate": 3.069676277187658e-05, "loss": 0.3122, "num_tokens": 750678017.0, "step": 3933 }, { "epoch": 1.342720600785117, "grad_norm": 0.5381936508056411, "learning_rate": 3.0690440060698025e-05, "loss": 0.2905, "num_tokens": 750864003.0, "step": 3934 }, { "epoch": 1.3430619559651817, "grad_norm": 0.2163702689258027, "learning_rate": 3.068411734951948e-05, "loss": 0.3042, "num_tokens": 751064403.0, "step": 3935 }, { "epoch": 1.3434033111452466, "grad_norm": 0.2730942253032862, "learning_rate": 3.0677794638340926e-05, "loss": 0.2958, "num_tokens": 751227777.0, "step": 3936 }, { "epoch": 1.3437446663253114, "grad_norm": 0.22095843467336762, "learning_rate": 3.067147192716237e-05, "loss": 0.317, "num_tokens": 751425637.0, "step": 3937 }, { "epoch": 1.3440860215053765, "grad_norm": 0.2294777015343135, "learning_rate": 3.066514921598381e-05, "loss": 0.307, "num_tokens": 751617423.0, "step": 3938 }, { "epoch": 1.344427376685441, "grad_norm": 0.2304342765608874, "learning_rate": 3.065882650480526e-05, "loss": 0.3107, "num_tokens": 751797808.0, "step": 3939 }, { "epoch": 1.3447687318655062, "grad_norm": 0.2369236668622721, "learning_rate": 3.065250379362671e-05, "loss": 0.3, "num_tokens": 751937216.0, "step": 3940 }, { "epoch": 1.3451100870455708, "grad_norm": 0.23541937165188948, "learning_rate": 3.0646181082448154e-05, "loss": 0.3058, "num_tokens": 752136032.0, "step": 3941 }, { "epoch": 1.3454514422256358, "grad_norm": 0.2397643134642863, "learning_rate": 3.06398583712696e-05, "loss": 0.3133, "num_tokens": 752339449.0, "step": 3942 }, { "epoch": 1.3457927974057007, "grad_norm": 0.2164694546896302, "learning_rate": 3.063353566009105e-05, "loss": 0.3458, "num_tokens": 752528787.0, "step": 3943 }, { "epoch": 1.3461341525857655, "grad_norm": 0.21789611123757344, "learning_rate": 3.0627212948912495e-05, "loss": 0.3266, "num_tokens": 752710471.0, "step": 3944 }, { "epoch": 1.3464755077658304, "grad_norm": 0.2465145016530291, "learning_rate": 3.062089023773394e-05, "loss": 0.3373, "num_tokens": 752911514.0, "step": 3945 }, { "epoch": 1.3468168629458952, "grad_norm": 0.23590923312015427, "learning_rate": 3.061456752655539e-05, "loss": 0.299, "num_tokens": 753090533.0, "step": 3946 }, { "epoch": 1.34715821812596, "grad_norm": 0.25728383632056967, "learning_rate": 3.0608244815376836e-05, "loss": 0.3153, "num_tokens": 753271462.0, "step": 3947 }, { "epoch": 1.3474995733060249, "grad_norm": 0.21841482297369683, "learning_rate": 3.060192210419828e-05, "loss": 0.3605, "num_tokens": 753510319.0, "step": 3948 }, { "epoch": 1.3478409284860897, "grad_norm": 0.23134369070318073, "learning_rate": 3.059559939301973e-05, "loss": 0.3046, "num_tokens": 753782305.0, "step": 3949 }, { "epoch": 1.3481822836661546, "grad_norm": 0.20544040926812224, "learning_rate": 3.058927668184117e-05, "loss": 0.307, "num_tokens": 753978860.0, "step": 3950 }, { "epoch": 1.3485236388462196, "grad_norm": 0.22916050373793004, "learning_rate": 3.058295397066262e-05, "loss": 0.2867, "num_tokens": 754156356.0, "step": 3951 }, { "epoch": 1.3488649940262842, "grad_norm": 0.2138064097143085, "learning_rate": 3.057663125948407e-05, "loss": 0.2891, "num_tokens": 754300363.0, "step": 3952 }, { "epoch": 1.3492063492063493, "grad_norm": 0.22774531629581396, "learning_rate": 3.057030854830552e-05, "loss": 0.2819, "num_tokens": 754452159.0, "step": 3953 }, { "epoch": 1.3495477043864141, "grad_norm": 0.2546061069560436, "learning_rate": 3.0563985837126964e-05, "loss": 0.2895, "num_tokens": 754638890.0, "step": 3954 }, { "epoch": 1.349889059566479, "grad_norm": 0.21214386026800341, "learning_rate": 3.055766312594841e-05, "loss": 0.3053, "num_tokens": 754878597.0, "step": 3955 }, { "epoch": 1.3502304147465438, "grad_norm": 0.20121975753482524, "learning_rate": 3.055134041476985e-05, "loss": 0.3159, "num_tokens": 755079259.0, "step": 3956 }, { "epoch": 1.3505717699266087, "grad_norm": 0.21966450374404278, "learning_rate": 3.05450177035913e-05, "loss": 0.3173, "num_tokens": 755263232.0, "step": 3957 }, { "epoch": 1.3509131251066735, "grad_norm": 0.2352373784757683, "learning_rate": 3.0538694992412746e-05, "loss": 0.3028, "num_tokens": 755459538.0, "step": 3958 }, { "epoch": 1.3512544802867383, "grad_norm": 0.23540614552919767, "learning_rate": 3.053237228123419e-05, "loss": 0.2921, "num_tokens": 755626647.0, "step": 3959 }, { "epoch": 1.3515958354668032, "grad_norm": 0.20992065998321377, "learning_rate": 3.052604957005564e-05, "loss": 0.3163, "num_tokens": 755820950.0, "step": 3960 }, { "epoch": 1.351937190646868, "grad_norm": 0.215031885307898, "learning_rate": 3.051972685887709e-05, "loss": 0.3068, "num_tokens": 756034540.0, "step": 3961 }, { "epoch": 1.3522785458269329, "grad_norm": 0.23490169687186963, "learning_rate": 3.0513404147698534e-05, "loss": 0.2871, "num_tokens": 756215906.0, "step": 3962 }, { "epoch": 1.3526199010069977, "grad_norm": 0.2396859707317909, "learning_rate": 3.050708143651998e-05, "loss": 0.3101, "num_tokens": 756415213.0, "step": 3963 }, { "epoch": 1.3529612561870628, "grad_norm": 0.20868435416067893, "learning_rate": 3.0500758725341427e-05, "loss": 0.2765, "num_tokens": 756562882.0, "step": 3964 }, { "epoch": 1.3533026113671274, "grad_norm": 0.23471178038235282, "learning_rate": 3.0494436014162874e-05, "loss": 0.293, "num_tokens": 756767271.0, "step": 3965 }, { "epoch": 1.3536439665471924, "grad_norm": 0.20919788560041985, "learning_rate": 3.048811330298432e-05, "loss": 0.3297, "num_tokens": 756939507.0, "step": 3966 }, { "epoch": 1.3539853217272573, "grad_norm": 0.22367770534119613, "learning_rate": 3.048179059180577e-05, "loss": 0.316, "num_tokens": 757150431.0, "step": 3967 }, { "epoch": 1.3543266769073221, "grad_norm": 0.265754909468408, "learning_rate": 3.0475467880627212e-05, "loss": 0.3095, "num_tokens": 757322046.0, "step": 3968 }, { "epoch": 1.354668032087387, "grad_norm": 0.24715388436584587, "learning_rate": 3.046914516944866e-05, "loss": 0.3347, "num_tokens": 757506878.0, "step": 3969 }, { "epoch": 1.3550093872674518, "grad_norm": 0.2197151048770177, "learning_rate": 3.0462822458270106e-05, "loss": 0.2916, "num_tokens": 757673369.0, "step": 3970 }, { "epoch": 1.3553507424475166, "grad_norm": 0.2359907685684337, "learning_rate": 3.0456499747091556e-05, "loss": 0.2977, "num_tokens": 757836728.0, "step": 3971 }, { "epoch": 1.3556920976275815, "grad_norm": 0.2430822338766219, "learning_rate": 3.0450177035913003e-05, "loss": 0.3295, "num_tokens": 758012632.0, "step": 3972 }, { "epoch": 1.3560334528076463, "grad_norm": 0.21630862664411765, "learning_rate": 3.044385432473445e-05, "loss": 0.2936, "num_tokens": 758182163.0, "step": 3973 }, { "epoch": 1.3563748079877112, "grad_norm": 0.22599284328792762, "learning_rate": 3.043753161355589e-05, "loss": 0.2974, "num_tokens": 758368423.0, "step": 3974 }, { "epoch": 1.356716163167776, "grad_norm": 0.2265448525940077, "learning_rate": 3.043120890237734e-05, "loss": 0.3609, "num_tokens": 758603116.0, "step": 3975 }, { "epoch": 1.3570575183478408, "grad_norm": 0.20620282712269664, "learning_rate": 3.0424886191198788e-05, "loss": 0.2664, "num_tokens": 758805142.0, "step": 3976 }, { "epoch": 1.357398873527906, "grad_norm": 0.2099889747999688, "learning_rate": 3.0418563480020235e-05, "loss": 0.2899, "num_tokens": 758981313.0, "step": 3977 }, { "epoch": 1.3577402287079705, "grad_norm": 0.2007984622949795, "learning_rate": 3.0412240768841682e-05, "loss": 0.3223, "num_tokens": 759179414.0, "step": 3978 }, { "epoch": 1.3580815838880356, "grad_norm": 0.20626318770149465, "learning_rate": 3.040591805766313e-05, "loss": 0.288, "num_tokens": 759351434.0, "step": 3979 }, { "epoch": 1.3584229390681004, "grad_norm": 0.23678401935423943, "learning_rate": 3.0399595346484572e-05, "loss": 0.3002, "num_tokens": 759549761.0, "step": 3980 }, { "epoch": 1.3587642942481652, "grad_norm": 0.20082267778235838, "learning_rate": 3.039327263530602e-05, "loss": 0.3141, "num_tokens": 759760127.0, "step": 3981 }, { "epoch": 1.35910564942823, "grad_norm": 0.20270855462676202, "learning_rate": 3.0386949924127466e-05, "loss": 0.2886, "num_tokens": 759911746.0, "step": 3982 }, { "epoch": 1.359447004608295, "grad_norm": 0.2343047646023638, "learning_rate": 3.0380627212948913e-05, "loss": 0.2966, "num_tokens": 760086864.0, "step": 3983 }, { "epoch": 1.3597883597883598, "grad_norm": 0.194312204134077, "learning_rate": 3.037430450177036e-05, "loss": 0.3285, "num_tokens": 760295778.0, "step": 3984 }, { "epoch": 1.3601297149684246, "grad_norm": 0.2502852981096746, "learning_rate": 3.036798179059181e-05, "loss": 0.2918, "num_tokens": 760477621.0, "step": 3985 }, { "epoch": 1.3604710701484894, "grad_norm": 0.23073516542836903, "learning_rate": 3.036165907941325e-05, "loss": 0.3213, "num_tokens": 760651403.0, "step": 3986 }, { "epoch": 1.3608124253285543, "grad_norm": 0.23763871601026862, "learning_rate": 3.0355336368234698e-05, "loss": 0.3358, "num_tokens": 760802174.0, "step": 3987 }, { "epoch": 1.3611537805086193, "grad_norm": 0.23543177646105634, "learning_rate": 3.0349013657056148e-05, "loss": 0.2795, "num_tokens": 760972158.0, "step": 3988 }, { "epoch": 1.361495135688684, "grad_norm": 0.23549133361727667, "learning_rate": 3.0342690945877595e-05, "loss": 0.3258, "num_tokens": 761168862.0, "step": 3989 }, { "epoch": 1.361836490868749, "grad_norm": 0.2366712200507046, "learning_rate": 3.0336368234699042e-05, "loss": 0.318, "num_tokens": 761355230.0, "step": 3990 }, { "epoch": 1.3621778460488139, "grad_norm": 0.21953823322753965, "learning_rate": 3.033004552352049e-05, "loss": 0.2982, "num_tokens": 761576431.0, "step": 3991 }, { "epoch": 1.3625192012288787, "grad_norm": 0.1896645356745859, "learning_rate": 3.0323722812341933e-05, "loss": 0.29, "num_tokens": 761749334.0, "step": 3992 }, { "epoch": 1.3628605564089435, "grad_norm": 0.236567972215688, "learning_rate": 3.031740010116338e-05, "loss": 0.3201, "num_tokens": 761934654.0, "step": 3993 }, { "epoch": 1.3632019115890084, "grad_norm": 0.22084440629590749, "learning_rate": 3.0311077389984827e-05, "loss": 0.3099, "num_tokens": 762127031.0, "step": 3994 }, { "epoch": 1.3635432667690732, "grad_norm": 0.22751405534911853, "learning_rate": 3.0304754678806274e-05, "loss": 0.3123, "num_tokens": 762317570.0, "step": 3995 }, { "epoch": 1.363884621949138, "grad_norm": 0.2553781410789939, "learning_rate": 3.029843196762772e-05, "loss": 0.323, "num_tokens": 762536152.0, "step": 3996 }, { "epoch": 1.364225977129203, "grad_norm": 0.21138181774304915, "learning_rate": 3.0292109256449168e-05, "loss": 0.2904, "num_tokens": 762781072.0, "step": 3997 }, { "epoch": 1.3645673323092677, "grad_norm": 0.1969885146419427, "learning_rate": 3.028578654527061e-05, "loss": 0.3468, "num_tokens": 762965573.0, "step": 3998 }, { "epoch": 1.3649086874893326, "grad_norm": 0.21767020383501387, "learning_rate": 3.027946383409206e-05, "loss": 0.3047, "num_tokens": 763160039.0, "step": 3999 }, { "epoch": 1.3652500426693974, "grad_norm": 0.2211294815326213, "learning_rate": 3.0273141122913505e-05, "loss": 0.3217, "num_tokens": 763334374.0, "step": 4000 }, { "epoch": 1.3655913978494625, "grad_norm": 0.23867220433715647, "learning_rate": 3.0266818411734956e-05, "loss": 0.3148, "num_tokens": 763536850.0, "step": 4001 }, { "epoch": 1.365932753029527, "grad_norm": 0.22202035086045, "learning_rate": 3.0260495700556403e-05, "loss": 0.3233, "num_tokens": 763737764.0, "step": 4002 }, { "epoch": 1.3662741082095922, "grad_norm": 0.22213790429459465, "learning_rate": 3.025417298937785e-05, "loss": 0.2891, "num_tokens": 763914847.0, "step": 4003 }, { "epoch": 1.366615463389657, "grad_norm": 0.20200213249485108, "learning_rate": 3.024785027819929e-05, "loss": 0.2885, "num_tokens": 764095314.0, "step": 4004 }, { "epoch": 1.3669568185697218, "grad_norm": 0.22327646578758492, "learning_rate": 3.024152756702074e-05, "loss": 0.3223, "num_tokens": 764309028.0, "step": 4005 }, { "epoch": 1.3672981737497867, "grad_norm": 0.19992336654737894, "learning_rate": 3.0235204855842187e-05, "loss": 0.2743, "num_tokens": 764469314.0, "step": 4006 }, { "epoch": 1.3676395289298515, "grad_norm": 0.22465885678926675, "learning_rate": 3.0228882144663634e-05, "loss": 0.2795, "num_tokens": 764620990.0, "step": 4007 }, { "epoch": 1.3679808841099164, "grad_norm": 0.22619276220308665, "learning_rate": 3.022255943348508e-05, "loss": 0.3343, "num_tokens": 764824562.0, "step": 4008 }, { "epoch": 1.3683222392899812, "grad_norm": 0.22472743755334065, "learning_rate": 3.0216236722306528e-05, "loss": 0.3164, "num_tokens": 765036987.0, "step": 4009 }, { "epoch": 1.368663594470046, "grad_norm": 0.19997560730540873, "learning_rate": 3.0209914011127972e-05, "loss": 0.3209, "num_tokens": 765248400.0, "step": 4010 }, { "epoch": 1.3690049496501109, "grad_norm": 0.2273553440507966, "learning_rate": 3.020359129994942e-05, "loss": 0.3272, "num_tokens": 765451111.0, "step": 4011 }, { "epoch": 1.3693463048301757, "grad_norm": 0.21750161225441597, "learning_rate": 3.0197268588770866e-05, "loss": 0.3033, "num_tokens": 765642870.0, "step": 4012 }, { "epoch": 1.3696876600102406, "grad_norm": 0.2318983721099548, "learning_rate": 3.0190945877592313e-05, "loss": 0.3065, "num_tokens": 765864916.0, "step": 4013 }, { "epoch": 1.3700290151903056, "grad_norm": 0.18709278281718628, "learning_rate": 3.018462316641376e-05, "loss": 0.3167, "num_tokens": 766085518.0, "step": 4014 }, { "epoch": 1.3703703703703702, "grad_norm": 0.23176930631961218, "learning_rate": 3.017830045523521e-05, "loss": 0.3184, "num_tokens": 766254392.0, "step": 4015 }, { "epoch": 1.3707117255504353, "grad_norm": 0.2166594564034208, "learning_rate": 3.017197774405665e-05, "loss": 0.3053, "num_tokens": 766416848.0, "step": 4016 }, { "epoch": 1.3710530807305001, "grad_norm": 0.2773490245125854, "learning_rate": 3.0165655032878097e-05, "loss": 0.3276, "num_tokens": 766590348.0, "step": 4017 }, { "epoch": 1.371394435910565, "grad_norm": 0.20731296808931643, "learning_rate": 3.0159332321699548e-05, "loss": 0.3015, "num_tokens": 766799208.0, "step": 4018 }, { "epoch": 1.3717357910906298, "grad_norm": 0.20079351194413228, "learning_rate": 3.0153009610520995e-05, "loss": 0.2975, "num_tokens": 766971983.0, "step": 4019 }, { "epoch": 1.3720771462706947, "grad_norm": 0.2538370596568072, "learning_rate": 3.014668689934244e-05, "loss": 0.3114, "num_tokens": 767154656.0, "step": 4020 }, { "epoch": 1.3724185014507595, "grad_norm": 0.2137286901253577, "learning_rate": 3.014036418816389e-05, "loss": 0.2995, "num_tokens": 767353533.0, "step": 4021 }, { "epoch": 1.3727598566308243, "grad_norm": 0.20594806436746074, "learning_rate": 3.0134041476985332e-05, "loss": 0.3235, "num_tokens": 767571026.0, "step": 4022 }, { "epoch": 1.3731012118108892, "grad_norm": 0.2218756463934315, "learning_rate": 3.012771876580678e-05, "loss": 0.3055, "num_tokens": 767745859.0, "step": 4023 }, { "epoch": 1.373442566990954, "grad_norm": 0.24132405769369206, "learning_rate": 3.0121396054628226e-05, "loss": 0.3302, "num_tokens": 767933412.0, "step": 4024 }, { "epoch": 1.373783922171019, "grad_norm": 0.22089658610113097, "learning_rate": 3.0115073343449673e-05, "loss": 0.3113, "num_tokens": 768114995.0, "step": 4025 }, { "epoch": 1.3741252773510837, "grad_norm": 0.21923077578056502, "learning_rate": 3.010875063227112e-05, "loss": 0.2995, "num_tokens": 768319966.0, "step": 4026 }, { "epoch": 1.3744666325311488, "grad_norm": 0.2395401492291959, "learning_rate": 3.0102427921092567e-05, "loss": 0.3192, "num_tokens": 768522641.0, "step": 4027 }, { "epoch": 1.3748079877112136, "grad_norm": 0.21407850684932012, "learning_rate": 3.009610520991401e-05, "loss": 0.2892, "num_tokens": 768686296.0, "step": 4028 }, { "epoch": 1.3751493428912784, "grad_norm": 0.20989197910058935, "learning_rate": 3.0089782498735458e-05, "loss": 0.3005, "num_tokens": 768915964.0, "step": 4029 }, { "epoch": 1.3754906980713433, "grad_norm": 0.21876791879888424, "learning_rate": 3.0083459787556905e-05, "loss": 0.3204, "num_tokens": 769106809.0, "step": 4030 }, { "epoch": 1.3758320532514081, "grad_norm": 0.21123884254081723, "learning_rate": 3.007713707637835e-05, "loss": 0.2794, "num_tokens": 769259471.0, "step": 4031 }, { "epoch": 1.376173408431473, "grad_norm": 0.2291837667037239, "learning_rate": 3.0070814365199802e-05, "loss": 0.324, "num_tokens": 769443578.0, "step": 4032 }, { "epoch": 1.3765147636115378, "grad_norm": 0.30732817960305275, "learning_rate": 3.006449165402125e-05, "loss": 0.2947, "num_tokens": 769628780.0, "step": 4033 }, { "epoch": 1.3768561187916026, "grad_norm": 0.23250497923512886, "learning_rate": 3.005816894284269e-05, "loss": 0.3137, "num_tokens": 769821430.0, "step": 4034 }, { "epoch": 1.3771974739716675, "grad_norm": 0.220177067881126, "learning_rate": 3.005184623166414e-05, "loss": 0.285, "num_tokens": 769994918.0, "step": 4035 }, { "epoch": 1.3775388291517323, "grad_norm": 0.224975342083467, "learning_rate": 3.0045523520485587e-05, "loss": 0.2886, "num_tokens": 770151154.0, "step": 4036 }, { "epoch": 1.3778801843317972, "grad_norm": 0.21737410184400352, "learning_rate": 3.0039200809307034e-05, "loss": 0.2977, "num_tokens": 770365659.0, "step": 4037 }, { "epoch": 1.3782215395118622, "grad_norm": 0.19265635118918287, "learning_rate": 3.003287809812848e-05, "loss": 0.3363, "num_tokens": 770625218.0, "step": 4038 }, { "epoch": 1.3785628946919268, "grad_norm": 0.20153341559901045, "learning_rate": 3.0026555386949927e-05, "loss": 0.2944, "num_tokens": 770853821.0, "step": 4039 }, { "epoch": 1.378904249871992, "grad_norm": 0.20905487032248465, "learning_rate": 3.002023267577137e-05, "loss": 0.3175, "num_tokens": 771065731.0, "step": 4040 }, { "epoch": 1.3792456050520567, "grad_norm": 0.23107422512135742, "learning_rate": 3.0013909964592818e-05, "loss": 0.3365, "num_tokens": 771273921.0, "step": 4041 }, { "epoch": 1.3795869602321216, "grad_norm": 0.20648271459579717, "learning_rate": 3.0007587253414265e-05, "loss": 0.3198, "num_tokens": 771470999.0, "step": 4042 }, { "epoch": 1.3799283154121864, "grad_norm": 0.21581903151352624, "learning_rate": 3.0001264542235712e-05, "loss": 0.3374, "num_tokens": 771702116.0, "step": 4043 }, { "epoch": 1.3802696705922513, "grad_norm": 0.22106444364912436, "learning_rate": 2.999494183105716e-05, "loss": 0.2748, "num_tokens": 771857135.0, "step": 4044 }, { "epoch": 1.380611025772316, "grad_norm": 0.23730950879218207, "learning_rate": 2.9988619119878603e-05, "loss": 0.3403, "num_tokens": 772073898.0, "step": 4045 }, { "epoch": 1.380952380952381, "grad_norm": 0.2538070883518368, "learning_rate": 2.998229640870005e-05, "loss": 0.3499, "num_tokens": 772251199.0, "step": 4046 }, { "epoch": 1.3812937361324458, "grad_norm": 0.21599353692731682, "learning_rate": 2.9975973697521497e-05, "loss": 0.3134, "num_tokens": 772443536.0, "step": 4047 }, { "epoch": 1.3816350913125106, "grad_norm": 0.2610906965783963, "learning_rate": 2.9969650986342947e-05, "loss": 0.3102, "num_tokens": 772612348.0, "step": 4048 }, { "epoch": 1.3819764464925754, "grad_norm": 0.22470993877930817, "learning_rate": 2.9963328275164394e-05, "loss": 0.3044, "num_tokens": 772848346.0, "step": 4049 }, { "epoch": 1.3823178016726403, "grad_norm": 0.21985291375519575, "learning_rate": 2.995700556398584e-05, "loss": 0.302, "num_tokens": 773079641.0, "step": 4050 }, { "epoch": 1.3826591568527054, "grad_norm": 0.2536797224074326, "learning_rate": 2.995068285280728e-05, "loss": 0.3357, "num_tokens": 773275850.0, "step": 4051 }, { "epoch": 1.38300051203277, "grad_norm": 0.21310904370328018, "learning_rate": 2.994436014162873e-05, "loss": 0.2944, "num_tokens": 773455655.0, "step": 4052 }, { "epoch": 1.383341867212835, "grad_norm": 0.21136646655304814, "learning_rate": 2.993803743045018e-05, "loss": 0.2877, "num_tokens": 773660214.0, "step": 4053 }, { "epoch": 1.3836832223928999, "grad_norm": 0.2046598761124972, "learning_rate": 2.9931714719271625e-05, "loss": 0.3248, "num_tokens": 773841863.0, "step": 4054 }, { "epoch": 1.3840245775729647, "grad_norm": 0.24860411089226545, "learning_rate": 2.9925392008093072e-05, "loss": 0.3005, "num_tokens": 774044759.0, "step": 4055 }, { "epoch": 1.3843659327530295, "grad_norm": 0.21664277651211702, "learning_rate": 2.991906929691452e-05, "loss": 0.2846, "num_tokens": 774226031.0, "step": 4056 }, { "epoch": 1.3847072879330944, "grad_norm": 0.24247236629389474, "learning_rate": 2.9912746585735963e-05, "loss": 0.302, "num_tokens": 774403093.0, "step": 4057 }, { "epoch": 1.3850486431131592, "grad_norm": 0.21727261069529463, "learning_rate": 2.990642387455741e-05, "loss": 0.3196, "num_tokens": 774603662.0, "step": 4058 }, { "epoch": 1.385389998293224, "grad_norm": 0.2311507635435123, "learning_rate": 2.9900101163378857e-05, "loss": 0.3254, "num_tokens": 774758789.0, "step": 4059 }, { "epoch": 1.385731353473289, "grad_norm": 0.2745211270170801, "learning_rate": 2.9893778452200304e-05, "loss": 0.3062, "num_tokens": 774936130.0, "step": 4060 }, { "epoch": 1.3860727086533537, "grad_norm": 0.22938625000016546, "learning_rate": 2.988745574102175e-05, "loss": 0.3185, "num_tokens": 775151598.0, "step": 4061 }, { "epoch": 1.3864140638334188, "grad_norm": 0.22551780501167068, "learning_rate": 2.98811330298432e-05, "loss": 0.2966, "num_tokens": 775326635.0, "step": 4062 }, { "epoch": 1.3867554190134834, "grad_norm": 0.23332497469895694, "learning_rate": 2.987481031866464e-05, "loss": 0.3076, "num_tokens": 775523678.0, "step": 4063 }, { "epoch": 1.3870967741935485, "grad_norm": 0.21713687551257774, "learning_rate": 2.986848760748609e-05, "loss": 0.283, "num_tokens": 775708762.0, "step": 4064 }, { "epoch": 1.3874381293736133, "grad_norm": 0.22825561362306301, "learning_rate": 2.986216489630754e-05, "loss": 0.3279, "num_tokens": 775933434.0, "step": 4065 }, { "epoch": 1.3877794845536782, "grad_norm": 0.2238714337116751, "learning_rate": 2.9855842185128986e-05, "loss": 0.3235, "num_tokens": 776099095.0, "step": 4066 }, { "epoch": 1.388120839733743, "grad_norm": 0.23877953636432614, "learning_rate": 2.9849519473950433e-05, "loss": 0.2838, "num_tokens": 776258082.0, "step": 4067 }, { "epoch": 1.3884621949138078, "grad_norm": 0.25165191842888923, "learning_rate": 2.984319676277188e-05, "loss": 0.2971, "num_tokens": 776449818.0, "step": 4068 }, { "epoch": 1.3888035500938727, "grad_norm": 0.2258124741320691, "learning_rate": 2.9836874051593323e-05, "loss": 0.3025, "num_tokens": 776632133.0, "step": 4069 }, { "epoch": 1.3891449052739375, "grad_norm": 0.2971999676557159, "learning_rate": 2.983055134041477e-05, "loss": 0.31, "num_tokens": 776835643.0, "step": 4070 }, { "epoch": 1.3894862604540024, "grad_norm": 0.2111276231131253, "learning_rate": 2.9824228629236217e-05, "loss": 0.3126, "num_tokens": 777051534.0, "step": 4071 }, { "epoch": 1.3898276156340672, "grad_norm": 0.22201365217026925, "learning_rate": 2.9817905918057664e-05, "loss": 0.3058, "num_tokens": 777253031.0, "step": 4072 }, { "epoch": 1.390168970814132, "grad_norm": 0.18830653998350935, "learning_rate": 2.981158320687911e-05, "loss": 0.3094, "num_tokens": 777421962.0, "step": 4073 }, { "epoch": 1.3905103259941969, "grad_norm": 0.23563237340148932, "learning_rate": 2.980526049570056e-05, "loss": 0.2992, "num_tokens": 777628314.0, "step": 4074 }, { "epoch": 1.390851681174262, "grad_norm": 0.22753094154010478, "learning_rate": 2.9798937784522002e-05, "loss": 0.312, "num_tokens": 777826122.0, "step": 4075 }, { "epoch": 1.3911930363543266, "grad_norm": 0.21195779296554187, "learning_rate": 2.979261507334345e-05, "loss": 0.3089, "num_tokens": 778061536.0, "step": 4076 }, { "epoch": 1.3915343915343916, "grad_norm": 0.20718750166875652, "learning_rate": 2.9786292362164896e-05, "loss": 0.2974, "num_tokens": 778237236.0, "step": 4077 }, { "epoch": 1.3918757467144565, "grad_norm": 0.19773408609720605, "learning_rate": 2.9779969650986346e-05, "loss": 0.2904, "num_tokens": 778437799.0, "step": 4078 }, { "epoch": 1.3922171018945213, "grad_norm": 0.2275676764422993, "learning_rate": 2.9773646939807793e-05, "loss": 0.3245, "num_tokens": 778669894.0, "step": 4079 }, { "epoch": 1.3925584570745861, "grad_norm": 0.2706837934138113, "learning_rate": 2.976732422862924e-05, "loss": 0.2992, "num_tokens": 778835855.0, "step": 4080 }, { "epoch": 1.392899812254651, "grad_norm": 0.21679852542580844, "learning_rate": 2.976100151745068e-05, "loss": 0.3002, "num_tokens": 779043984.0, "step": 4081 }, { "epoch": 1.3932411674347158, "grad_norm": 0.2022261007892411, "learning_rate": 2.975467880627213e-05, "loss": 0.3028, "num_tokens": 779240375.0, "step": 4082 }, { "epoch": 1.3935825226147807, "grad_norm": 0.21984806251455966, "learning_rate": 2.9748356095093578e-05, "loss": 0.3088, "num_tokens": 779414873.0, "step": 4083 }, { "epoch": 1.3939238777948455, "grad_norm": 0.2241259735448817, "learning_rate": 2.9742033383915025e-05, "loss": 0.2984, "num_tokens": 779582497.0, "step": 4084 }, { "epoch": 1.3942652329749103, "grad_norm": 0.2339970204007028, "learning_rate": 2.9735710672736472e-05, "loss": 0.2961, "num_tokens": 779754964.0, "step": 4085 }, { "epoch": 1.3946065881549752, "grad_norm": 0.23130821288379572, "learning_rate": 2.972938796155792e-05, "loss": 0.3057, "num_tokens": 779950749.0, "step": 4086 }, { "epoch": 1.39494794333504, "grad_norm": 0.20049123346046732, "learning_rate": 2.9723065250379362e-05, "loss": 0.3122, "num_tokens": 780164619.0, "step": 4087 }, { "epoch": 1.395289298515105, "grad_norm": 0.21471253642774657, "learning_rate": 2.971674253920081e-05, "loss": 0.3003, "num_tokens": 780358267.0, "step": 4088 }, { "epoch": 1.3956306536951697, "grad_norm": 0.22764367899055582, "learning_rate": 2.9710419828022256e-05, "loss": 0.2892, "num_tokens": 780555190.0, "step": 4089 }, { "epoch": 1.3959720088752348, "grad_norm": 0.2023435213416532, "learning_rate": 2.9704097116843703e-05, "loss": 0.3215, "num_tokens": 780738874.0, "step": 4090 }, { "epoch": 1.3963133640552996, "grad_norm": 0.228645616866367, "learning_rate": 2.969777440566515e-05, "loss": 0.3162, "num_tokens": 780966005.0, "step": 4091 }, { "epoch": 1.3966547192353644, "grad_norm": 0.2390348299886198, "learning_rate": 2.96914516944866e-05, "loss": 0.3222, "num_tokens": 781165939.0, "step": 4092 }, { "epoch": 1.3969960744154293, "grad_norm": 0.19426356900907887, "learning_rate": 2.968512898330804e-05, "loss": 0.3054, "num_tokens": 781374648.0, "step": 4093 }, { "epoch": 1.3973374295954941, "grad_norm": 0.2085175134943673, "learning_rate": 2.9678806272129488e-05, "loss": 0.2984, "num_tokens": 781562129.0, "step": 4094 }, { "epoch": 1.397678784775559, "grad_norm": 0.24298281101203767, "learning_rate": 2.9672483560950938e-05, "loss": 0.3252, "num_tokens": 781757492.0, "step": 4095 }, { "epoch": 1.3980201399556238, "grad_norm": 0.41842266774762704, "learning_rate": 2.9666160849772385e-05, "loss": 0.3448, "num_tokens": 781983935.0, "step": 4096 }, { "epoch": 1.3983614951356886, "grad_norm": 0.208973737468116, "learning_rate": 2.9659838138593832e-05, "loss": 0.2779, "num_tokens": 782162812.0, "step": 4097 }, { "epoch": 1.3987028503157535, "grad_norm": 0.23889162491118024, "learning_rate": 2.965351542741528e-05, "loss": 0.2837, "num_tokens": 782352110.0, "step": 4098 }, { "epoch": 1.3990442054958183, "grad_norm": 0.21843614354668914, "learning_rate": 2.9647192716236723e-05, "loss": 0.3537, "num_tokens": 782590271.0, "step": 4099 }, { "epoch": 1.3993855606758832, "grad_norm": 0.19647718617359997, "learning_rate": 2.964087000505817e-05, "loss": 0.3265, "num_tokens": 782789418.0, "step": 4100 }, { "epoch": 1.3997269158559482, "grad_norm": 0.23838453854356828, "learning_rate": 2.9634547293879617e-05, "loss": 0.3086, "num_tokens": 782976482.0, "step": 4101 }, { "epoch": 1.4000682710360128, "grad_norm": 0.21540713587142535, "learning_rate": 2.9628224582701064e-05, "loss": 0.3231, "num_tokens": 783189064.0, "step": 4102 }, { "epoch": 1.400409626216078, "grad_norm": 0.20255401327746922, "learning_rate": 2.962190187152251e-05, "loss": 0.31, "num_tokens": 783387868.0, "step": 4103 }, { "epoch": 1.4007509813961427, "grad_norm": 0.24374317227937267, "learning_rate": 2.9615579160343958e-05, "loss": 0.2967, "num_tokens": 783608879.0, "step": 4104 }, { "epoch": 1.4010923365762076, "grad_norm": 0.19507694054809677, "learning_rate": 2.96092564491654e-05, "loss": 0.3075, "num_tokens": 783798148.0, "step": 4105 }, { "epoch": 1.4014336917562724, "grad_norm": 0.22065771695374373, "learning_rate": 2.9602933737986848e-05, "loss": 0.2931, "num_tokens": 784008237.0, "step": 4106 }, { "epoch": 1.4017750469363373, "grad_norm": 0.2134440122906402, "learning_rate": 2.9596611026808295e-05, "loss": 0.3135, "num_tokens": 784227560.0, "step": 4107 }, { "epoch": 1.402116402116402, "grad_norm": 0.20637936395379158, "learning_rate": 2.9590288315629742e-05, "loss": 0.2864, "num_tokens": 784384227.0, "step": 4108 }, { "epoch": 1.402457757296467, "grad_norm": 0.2847125171828284, "learning_rate": 2.9583965604451193e-05, "loss": 0.3375, "num_tokens": 784528239.0, "step": 4109 }, { "epoch": 1.4027991124765318, "grad_norm": 0.2549112419133835, "learning_rate": 2.957764289327264e-05, "loss": 0.3065, "num_tokens": 784705434.0, "step": 4110 }, { "epoch": 1.4031404676565966, "grad_norm": 0.21612189229708326, "learning_rate": 2.957132018209408e-05, "loss": 0.3042, "num_tokens": 784928945.0, "step": 4111 }, { "epoch": 1.4034818228366617, "grad_norm": 0.23000135421480558, "learning_rate": 2.956499747091553e-05, "loss": 0.3326, "num_tokens": 785128659.0, "step": 4112 }, { "epoch": 1.4038231780167263, "grad_norm": 0.21941553564648592, "learning_rate": 2.9558674759736977e-05, "loss": 0.3068, "num_tokens": 785361790.0, "step": 4113 }, { "epoch": 1.4041645331967914, "grad_norm": 0.1946417041020637, "learning_rate": 2.9552352048558424e-05, "loss": 0.312, "num_tokens": 785552642.0, "step": 4114 }, { "epoch": 1.4045058883768562, "grad_norm": 0.21740722669178372, "learning_rate": 2.954602933737987e-05, "loss": 0.283, "num_tokens": 785739604.0, "step": 4115 }, { "epoch": 1.404847243556921, "grad_norm": 0.19998061603460823, "learning_rate": 2.9539706626201318e-05, "loss": 0.2856, "num_tokens": 785921439.0, "step": 4116 }, { "epoch": 1.4051885987369859, "grad_norm": 0.21254922942614626, "learning_rate": 2.953338391502276e-05, "loss": 0.323, "num_tokens": 786160503.0, "step": 4117 }, { "epoch": 1.4055299539170507, "grad_norm": 0.22293123229965991, "learning_rate": 2.952706120384421e-05, "loss": 0.2974, "num_tokens": 786352568.0, "step": 4118 }, { "epoch": 1.4058713090971156, "grad_norm": 0.22103716629850093, "learning_rate": 2.9520738492665656e-05, "loss": 0.2904, "num_tokens": 786572774.0, "step": 4119 }, { "epoch": 1.4062126642771804, "grad_norm": 0.22852715662852405, "learning_rate": 2.9514415781487103e-05, "loss": 0.3544, "num_tokens": 786756143.0, "step": 4120 }, { "epoch": 1.4065540194572452, "grad_norm": 0.22213109699900008, "learning_rate": 2.950809307030855e-05, "loss": 0.2921, "num_tokens": 786940497.0, "step": 4121 }, { "epoch": 1.40689537463731, "grad_norm": 0.21739900172137638, "learning_rate": 2.950177035913e-05, "loss": 0.3353, "num_tokens": 787111086.0, "step": 4122 }, { "epoch": 1.407236729817375, "grad_norm": 0.27076168640985476, "learning_rate": 2.949544764795144e-05, "loss": 0.3086, "num_tokens": 787305787.0, "step": 4123 }, { "epoch": 1.4075780849974397, "grad_norm": 0.19472013020711376, "learning_rate": 2.9489124936772887e-05, "loss": 0.3056, "num_tokens": 787506315.0, "step": 4124 }, { "epoch": 1.4079194401775048, "grad_norm": 0.20811925012607452, "learning_rate": 2.9482802225594338e-05, "loss": 0.3167, "num_tokens": 787724863.0, "step": 4125 }, { "epoch": 1.4082607953575694, "grad_norm": 0.22176750241930623, "learning_rate": 2.9476479514415785e-05, "loss": 0.3174, "num_tokens": 787912055.0, "step": 4126 }, { "epoch": 1.4086021505376345, "grad_norm": 0.21212913357111532, "learning_rate": 2.947015680323723e-05, "loss": 0.2888, "num_tokens": 788132383.0, "step": 4127 }, { "epoch": 1.4089435057176993, "grad_norm": 0.19287662121600554, "learning_rate": 2.946383409205868e-05, "loss": 0.3098, "num_tokens": 788325482.0, "step": 4128 }, { "epoch": 1.4092848608977642, "grad_norm": 0.2370736842593829, "learning_rate": 2.9457511380880122e-05, "loss": 0.2901, "num_tokens": 788536737.0, "step": 4129 }, { "epoch": 1.409626216077829, "grad_norm": 0.1975650947443542, "learning_rate": 2.945118866970157e-05, "loss": 0.3052, "num_tokens": 788715325.0, "step": 4130 }, { "epoch": 1.4099675712578938, "grad_norm": 0.2240024596489356, "learning_rate": 2.9444865958523016e-05, "loss": 0.2677, "num_tokens": 788875531.0, "step": 4131 }, { "epoch": 1.4103089264379587, "grad_norm": 0.25401330694898955, "learning_rate": 2.9438543247344463e-05, "loss": 0.3399, "num_tokens": 789046455.0, "step": 4132 }, { "epoch": 1.4106502816180235, "grad_norm": 0.2298702588474106, "learning_rate": 2.943222053616591e-05, "loss": 0.2734, "num_tokens": 789238277.0, "step": 4133 }, { "epoch": 1.4109916367980884, "grad_norm": 0.20442262998282537, "learning_rate": 2.9425897824987357e-05, "loss": 0.3052, "num_tokens": 789424800.0, "step": 4134 }, { "epoch": 1.4113329919781532, "grad_norm": 0.22253811629792486, "learning_rate": 2.94195751138088e-05, "loss": 0.3109, "num_tokens": 789614197.0, "step": 4135 }, { "epoch": 1.411674347158218, "grad_norm": 0.23243995207185356, "learning_rate": 2.9413252402630248e-05, "loss": 0.3271, "num_tokens": 789808172.0, "step": 4136 }, { "epoch": 1.4120157023382829, "grad_norm": 0.2368166577453002, "learning_rate": 2.9406929691451695e-05, "loss": 0.3272, "num_tokens": 789984648.0, "step": 4137 }, { "epoch": 1.412357057518348, "grad_norm": 0.22770373770823565, "learning_rate": 2.940060698027314e-05, "loss": 0.3079, "num_tokens": 790166471.0, "step": 4138 }, { "epoch": 1.4126984126984126, "grad_norm": 0.20753387630120698, "learning_rate": 2.9394284269094592e-05, "loss": 0.3384, "num_tokens": 790376810.0, "step": 4139 }, { "epoch": 1.4130397678784776, "grad_norm": 0.2490849160295391, "learning_rate": 2.938796155791604e-05, "loss": 0.3058, "num_tokens": 790548572.0, "step": 4140 }, { "epoch": 1.4133811230585425, "grad_norm": 0.20108463563157555, "learning_rate": 2.938163884673748e-05, "loss": 0.3081, "num_tokens": 790713313.0, "step": 4141 }, { "epoch": 1.4137224782386073, "grad_norm": 0.25809849599071655, "learning_rate": 2.937531613555893e-05, "loss": 0.2792, "num_tokens": 790867744.0, "step": 4142 }, { "epoch": 1.4140638334186721, "grad_norm": 0.24219376875534804, "learning_rate": 2.9368993424380376e-05, "loss": 0.3239, "num_tokens": 791051457.0, "step": 4143 }, { "epoch": 1.414405188598737, "grad_norm": 0.23383237925629632, "learning_rate": 2.9362670713201823e-05, "loss": 0.2866, "num_tokens": 791199286.0, "step": 4144 }, { "epoch": 1.4147465437788018, "grad_norm": 0.21455829111206523, "learning_rate": 2.935634800202327e-05, "loss": 0.32, "num_tokens": 791378619.0, "step": 4145 }, { "epoch": 1.4150878989588667, "grad_norm": 0.21834679778059732, "learning_rate": 2.9350025290844717e-05, "loss": 0.295, "num_tokens": 791567584.0, "step": 4146 }, { "epoch": 1.4154292541389315, "grad_norm": 0.23134551232878764, "learning_rate": 2.934370257966616e-05, "loss": 0.2957, "num_tokens": 791746183.0, "step": 4147 }, { "epoch": 1.4157706093189963, "grad_norm": 0.21763539795892145, "learning_rate": 2.9337379868487608e-05, "loss": 0.3003, "num_tokens": 791962480.0, "step": 4148 }, { "epoch": 1.4161119644990614, "grad_norm": 0.21325374849508, "learning_rate": 2.9331057157309055e-05, "loss": 0.3267, "num_tokens": 792188236.0, "step": 4149 }, { "epoch": 1.416453319679126, "grad_norm": 0.21036575329369767, "learning_rate": 2.9324734446130502e-05, "loss": 0.2957, "num_tokens": 792360867.0, "step": 4150 }, { "epoch": 1.416794674859191, "grad_norm": 0.2517593410102208, "learning_rate": 2.931841173495195e-05, "loss": 0.2945, "num_tokens": 792514761.0, "step": 4151 }, { "epoch": 1.417136030039256, "grad_norm": 0.2436159656804106, "learning_rate": 2.93120890237734e-05, "loss": 0.2867, "num_tokens": 792673460.0, "step": 4152 }, { "epoch": 1.4174773852193208, "grad_norm": 0.22871251103686488, "learning_rate": 2.930576631259484e-05, "loss": 0.3339, "num_tokens": 792879221.0, "step": 4153 }, { "epoch": 1.4178187403993856, "grad_norm": 0.2404635588303471, "learning_rate": 2.9299443601416287e-05, "loss": 0.34, "num_tokens": 793074511.0, "step": 4154 }, { "epoch": 1.4181600955794504, "grad_norm": 0.2439424076621909, "learning_rate": 2.9293120890237737e-05, "loss": 0.3024, "num_tokens": 793246658.0, "step": 4155 }, { "epoch": 1.4185014507595153, "grad_norm": 0.21042344236467955, "learning_rate": 2.9286798179059184e-05, "loss": 0.3213, "num_tokens": 793429282.0, "step": 4156 }, { "epoch": 1.4188428059395801, "grad_norm": 0.25364217444931364, "learning_rate": 2.928047546788063e-05, "loss": 0.2981, "num_tokens": 793594189.0, "step": 4157 }, { "epoch": 1.419184161119645, "grad_norm": 0.22868518571787888, "learning_rate": 2.9274152756702078e-05, "loss": 0.3084, "num_tokens": 793761194.0, "step": 4158 }, { "epoch": 1.4195255162997098, "grad_norm": 0.23557798840576583, "learning_rate": 2.926783004552352e-05, "loss": 0.3442, "num_tokens": 793975430.0, "step": 4159 }, { "epoch": 1.4198668714797746, "grad_norm": 0.23630656394573807, "learning_rate": 2.926150733434497e-05, "loss": 0.3388, "num_tokens": 794181649.0, "step": 4160 }, { "epoch": 1.4202082266598395, "grad_norm": 0.2281958719871812, "learning_rate": 2.9255184623166415e-05, "loss": 0.3125, "num_tokens": 794323341.0, "step": 4161 }, { "epoch": 1.4205495818399045, "grad_norm": 0.26918487567850197, "learning_rate": 2.9248861911987862e-05, "loss": 0.2628, "num_tokens": 794447545.0, "step": 4162 }, { "epoch": 1.4208909370199692, "grad_norm": 0.23690029909332652, "learning_rate": 2.924253920080931e-05, "loss": 0.3116, "num_tokens": 794650577.0, "step": 4163 }, { "epoch": 1.4212322922000342, "grad_norm": 0.264388196394246, "learning_rate": 2.9236216489630756e-05, "loss": 0.3136, "num_tokens": 794861647.0, "step": 4164 }, { "epoch": 1.421573647380099, "grad_norm": 0.21184063459764166, "learning_rate": 2.92298937784522e-05, "loss": 0.2947, "num_tokens": 795043156.0, "step": 4165 }, { "epoch": 1.421915002560164, "grad_norm": 0.2136243006442994, "learning_rate": 2.9223571067273647e-05, "loss": 0.3049, "num_tokens": 795256061.0, "step": 4166 }, { "epoch": 1.4222563577402287, "grad_norm": 0.2503577237883329, "learning_rate": 2.9217248356095094e-05, "loss": 0.3114, "num_tokens": 795441775.0, "step": 4167 }, { "epoch": 1.4225977129202936, "grad_norm": 0.23182070350413747, "learning_rate": 2.921092564491654e-05, "loss": 0.3286, "num_tokens": 795624096.0, "step": 4168 }, { "epoch": 1.4229390681003584, "grad_norm": 0.23095575472014943, "learning_rate": 2.920460293373799e-05, "loss": 0.3199, "num_tokens": 795835589.0, "step": 4169 }, { "epoch": 1.4232804232804233, "grad_norm": 0.23951400877954088, "learning_rate": 2.9198280222559438e-05, "loss": 0.3136, "num_tokens": 796024152.0, "step": 4170 }, { "epoch": 1.423621778460488, "grad_norm": 0.2114531307708859, "learning_rate": 2.919195751138088e-05, "loss": 0.3122, "num_tokens": 796209495.0, "step": 4171 }, { "epoch": 1.423963133640553, "grad_norm": 0.21848532134731138, "learning_rate": 2.918563480020233e-05, "loss": 0.2724, "num_tokens": 796415317.0, "step": 4172 }, { "epoch": 1.4243044888206178, "grad_norm": 0.2051310106236042, "learning_rate": 2.9179312089023776e-05, "loss": 0.2977, "num_tokens": 796607927.0, "step": 4173 }, { "epoch": 1.4246458440006826, "grad_norm": 0.24665175426464248, "learning_rate": 2.9172989377845223e-05, "loss": 0.3234, "num_tokens": 796808748.0, "step": 4174 }, { "epoch": 1.4249871991807477, "grad_norm": 0.20261637945814637, "learning_rate": 2.916666666666667e-05, "loss": 0.3248, "num_tokens": 797000096.0, "step": 4175 }, { "epoch": 1.4253285543608123, "grad_norm": 0.2539932914549366, "learning_rate": 2.9160343955488113e-05, "loss": 0.3023, "num_tokens": 797194624.0, "step": 4176 }, { "epoch": 1.4256699095408774, "grad_norm": 0.20143307187617798, "learning_rate": 2.915402124430956e-05, "loss": 0.322, "num_tokens": 797408307.0, "step": 4177 }, { "epoch": 1.4260112647209422, "grad_norm": 0.23719781327721498, "learning_rate": 2.9147698533131007e-05, "loss": 0.2798, "num_tokens": 797594717.0, "step": 4178 }, { "epoch": 1.426352619901007, "grad_norm": 0.18708685108324308, "learning_rate": 2.9141375821952454e-05, "loss": 0.2763, "num_tokens": 797797592.0, "step": 4179 }, { "epoch": 1.4266939750810719, "grad_norm": 0.23695658833681318, "learning_rate": 2.91350531107739e-05, "loss": 0.3031, "num_tokens": 797997331.0, "step": 4180 }, { "epoch": 1.4270353302611367, "grad_norm": 0.20195128385341068, "learning_rate": 2.9128730399595348e-05, "loss": 0.2978, "num_tokens": 798206165.0, "step": 4181 }, { "epoch": 1.4273766854412016, "grad_norm": 0.2146765632813288, "learning_rate": 2.9122407688416792e-05, "loss": 0.2865, "num_tokens": 798395982.0, "step": 4182 }, { "epoch": 1.4277180406212664, "grad_norm": 0.2048535324039108, "learning_rate": 2.911608497723824e-05, "loss": 0.3071, "num_tokens": 798612614.0, "step": 4183 }, { "epoch": 1.4280593958013312, "grad_norm": 0.20827492625084013, "learning_rate": 2.9109762266059686e-05, "loss": 0.3037, "num_tokens": 798800200.0, "step": 4184 }, { "epoch": 1.428400750981396, "grad_norm": 0.22137503432168873, "learning_rate": 2.9103439554881133e-05, "loss": 0.319, "num_tokens": 799015992.0, "step": 4185 }, { "epoch": 1.4287421061614611, "grad_norm": 0.23728383694310579, "learning_rate": 2.9097116843702583e-05, "loss": 0.3011, "num_tokens": 799196494.0, "step": 4186 }, { "epoch": 1.4290834613415258, "grad_norm": 0.2090623325539929, "learning_rate": 2.909079413252403e-05, "loss": 0.2974, "num_tokens": 799368637.0, "step": 4187 }, { "epoch": 1.4294248165215908, "grad_norm": 0.23314471683251586, "learning_rate": 2.908447142134547e-05, "loss": 0.3185, "num_tokens": 799553729.0, "step": 4188 }, { "epoch": 1.4297661717016557, "grad_norm": 0.19413583165442033, "learning_rate": 2.907814871016692e-05, "loss": 0.3082, "num_tokens": 799780624.0, "step": 4189 }, { "epoch": 1.4301075268817205, "grad_norm": 0.2293502323220682, "learning_rate": 2.9071825998988368e-05, "loss": 0.2802, "num_tokens": 799988376.0, "step": 4190 }, { "epoch": 1.4304488820617853, "grad_norm": 0.2261285625273756, "learning_rate": 2.9065503287809815e-05, "loss": 0.3096, "num_tokens": 800173797.0, "step": 4191 }, { "epoch": 1.4307902372418502, "grad_norm": 0.30558478700461933, "learning_rate": 2.9059180576631262e-05, "loss": 0.3513, "num_tokens": 800385881.0, "step": 4192 }, { "epoch": 1.431131592421915, "grad_norm": 0.22708991950853225, "learning_rate": 2.905285786545271e-05, "loss": 0.2915, "num_tokens": 800577204.0, "step": 4193 }, { "epoch": 1.4314729476019799, "grad_norm": 0.20216009159899687, "learning_rate": 2.9046535154274152e-05, "loss": 0.2992, "num_tokens": 800762567.0, "step": 4194 }, { "epoch": 1.4318143027820447, "grad_norm": 0.350161525462981, "learning_rate": 2.90402124430956e-05, "loss": 0.2669, "num_tokens": 800921624.0, "step": 4195 }, { "epoch": 1.4321556579621095, "grad_norm": 0.219389298038316, "learning_rate": 2.9033889731917046e-05, "loss": 0.3016, "num_tokens": 801118508.0, "step": 4196 }, { "epoch": 1.4324970131421744, "grad_norm": 0.19024301792387532, "learning_rate": 2.9027567020738493e-05, "loss": 0.3069, "num_tokens": 801325432.0, "step": 4197 }, { "epoch": 1.4328383683222392, "grad_norm": 0.23570004352249288, "learning_rate": 2.902124430955994e-05, "loss": 0.2811, "num_tokens": 801505456.0, "step": 4198 }, { "epoch": 1.4331797235023043, "grad_norm": 0.2335335147027213, "learning_rate": 2.901492159838139e-05, "loss": 0.3135, "num_tokens": 801728101.0, "step": 4199 }, { "epoch": 1.4335210786823689, "grad_norm": 0.19915831482881685, "learning_rate": 2.900859888720283e-05, "loss": 0.2934, "num_tokens": 801933554.0, "step": 4200 }, { "epoch": 1.433862433862434, "grad_norm": 0.23225560849892798, "learning_rate": 2.9002276176024278e-05, "loss": 0.2809, "num_tokens": 802137637.0, "step": 4201 }, { "epoch": 1.4342037890424988, "grad_norm": 0.22101173224799683, "learning_rate": 2.8995953464845728e-05, "loss": 0.3312, "num_tokens": 802343991.0, "step": 4202 }, { "epoch": 1.4345451442225636, "grad_norm": 0.24239015921857052, "learning_rate": 2.8989630753667175e-05, "loss": 0.3078, "num_tokens": 802527377.0, "step": 4203 }, { "epoch": 1.4348864994026285, "grad_norm": 0.18046048381144367, "learning_rate": 2.8983308042488622e-05, "loss": 0.3069, "num_tokens": 802783861.0, "step": 4204 }, { "epoch": 1.4352278545826933, "grad_norm": 0.2229121915120426, "learning_rate": 2.897698533131007e-05, "loss": 0.2681, "num_tokens": 803006078.0, "step": 4205 }, { "epoch": 1.4355692097627581, "grad_norm": 0.1902937298653273, "learning_rate": 2.8970662620131513e-05, "loss": 0.3137, "num_tokens": 803234422.0, "step": 4206 }, { "epoch": 1.435910564942823, "grad_norm": 0.22242438992185656, "learning_rate": 2.896433990895296e-05, "loss": 0.3274, "num_tokens": 803449380.0, "step": 4207 }, { "epoch": 1.4362519201228878, "grad_norm": 0.20904198104434252, "learning_rate": 2.8958017197774407e-05, "loss": 0.308, "num_tokens": 803621411.0, "step": 4208 }, { "epoch": 1.4365932753029527, "grad_norm": 0.24837551212876177, "learning_rate": 2.8951694486595854e-05, "loss": 0.316, "num_tokens": 803820478.0, "step": 4209 }, { "epoch": 1.4369346304830175, "grad_norm": 0.21503712262878627, "learning_rate": 2.89453717754173e-05, "loss": 0.3061, "num_tokens": 804012965.0, "step": 4210 }, { "epoch": 1.4372759856630823, "grad_norm": 0.2254615912051297, "learning_rate": 2.8939049064238748e-05, "loss": 0.341, "num_tokens": 804220504.0, "step": 4211 }, { "epoch": 1.4376173408431474, "grad_norm": 0.2324580245291344, "learning_rate": 2.893272635306019e-05, "loss": 0.34, "num_tokens": 804428430.0, "step": 4212 }, { "epoch": 1.437958696023212, "grad_norm": 0.2163919812704637, "learning_rate": 2.8926403641881638e-05, "loss": 0.2899, "num_tokens": 804597735.0, "step": 4213 }, { "epoch": 1.438300051203277, "grad_norm": 0.24141938332205076, "learning_rate": 2.8920080930703085e-05, "loss": 0.3152, "num_tokens": 804763041.0, "step": 4214 }, { "epoch": 1.438641406383342, "grad_norm": 0.22971319854955938, "learning_rate": 2.8913758219524532e-05, "loss": 0.2863, "num_tokens": 804946609.0, "step": 4215 }, { "epoch": 1.4389827615634068, "grad_norm": 0.2164215654633723, "learning_rate": 2.8907435508345983e-05, "loss": 0.2989, "num_tokens": 805107620.0, "step": 4216 }, { "epoch": 1.4393241167434716, "grad_norm": 0.2278093793720779, "learning_rate": 2.890111279716743e-05, "loss": 0.3278, "num_tokens": 805303442.0, "step": 4217 }, { "epoch": 1.4396654719235364, "grad_norm": 0.25563386245857145, "learning_rate": 2.889479008598887e-05, "loss": 0.31, "num_tokens": 805484231.0, "step": 4218 }, { "epoch": 1.4400068271036013, "grad_norm": 0.21721266134973324, "learning_rate": 2.888846737481032e-05, "loss": 0.2747, "num_tokens": 805672262.0, "step": 4219 }, { "epoch": 1.4403481822836661, "grad_norm": 0.22072062826530175, "learning_rate": 2.8882144663631767e-05, "loss": 0.304, "num_tokens": 805869997.0, "step": 4220 }, { "epoch": 1.440689537463731, "grad_norm": 0.21581833047195928, "learning_rate": 2.8875821952453214e-05, "loss": 0.3313, "num_tokens": 806033891.0, "step": 4221 }, { "epoch": 1.4410308926437958, "grad_norm": 0.2624800145805712, "learning_rate": 2.886949924127466e-05, "loss": 0.3235, "num_tokens": 806196499.0, "step": 4222 }, { "epoch": 1.4413722478238609, "grad_norm": 0.19470541210829084, "learning_rate": 2.8863176530096108e-05, "loss": 0.3058, "num_tokens": 806394738.0, "step": 4223 }, { "epoch": 1.4417136030039255, "grad_norm": 0.23173152567718427, "learning_rate": 2.885685381891755e-05, "loss": 0.3097, "num_tokens": 806570732.0, "step": 4224 }, { "epoch": 1.4420549581839905, "grad_norm": 0.22708170154314447, "learning_rate": 2.8850531107739e-05, "loss": 0.3142, "num_tokens": 806760017.0, "step": 4225 }, { "epoch": 1.4423963133640554, "grad_norm": 0.22434318852567786, "learning_rate": 2.8844208396560446e-05, "loss": 0.3144, "num_tokens": 806946196.0, "step": 4226 }, { "epoch": 1.4427376685441202, "grad_norm": 0.19734268280157505, "learning_rate": 2.8837885685381893e-05, "loss": 0.316, "num_tokens": 807171260.0, "step": 4227 }, { "epoch": 1.443079023724185, "grad_norm": 0.2171001624698041, "learning_rate": 2.883156297420334e-05, "loss": 0.2995, "num_tokens": 807377791.0, "step": 4228 }, { "epoch": 1.44342037890425, "grad_norm": 0.21648891478395416, "learning_rate": 2.882524026302479e-05, "loss": 0.3318, "num_tokens": 807599894.0, "step": 4229 }, { "epoch": 1.4437617340843147, "grad_norm": 0.2286580486236493, "learning_rate": 2.881891755184623e-05, "loss": 0.3102, "num_tokens": 807768736.0, "step": 4230 }, { "epoch": 1.4441030892643796, "grad_norm": 0.22447295081934324, "learning_rate": 2.8812594840667677e-05, "loss": 0.2787, "num_tokens": 807938679.0, "step": 4231 }, { "epoch": 1.4444444444444444, "grad_norm": 0.23780747241650566, "learning_rate": 2.8806272129489127e-05, "loss": 0.306, "num_tokens": 808137908.0, "step": 4232 }, { "epoch": 1.4447857996245093, "grad_norm": 0.20080622543054025, "learning_rate": 2.8799949418310574e-05, "loss": 0.3083, "num_tokens": 808362054.0, "step": 4233 }, { "epoch": 1.445127154804574, "grad_norm": 0.201272524531404, "learning_rate": 2.879362670713202e-05, "loss": 0.2764, "num_tokens": 808550906.0, "step": 4234 }, { "epoch": 1.445468509984639, "grad_norm": 0.2160298775981571, "learning_rate": 2.878730399595347e-05, "loss": 0.3006, "num_tokens": 808734336.0, "step": 4235 }, { "epoch": 1.445809865164704, "grad_norm": 0.2556830429365663, "learning_rate": 2.8780981284774912e-05, "loss": 0.3137, "num_tokens": 808937998.0, "step": 4236 }, { "epoch": 1.4461512203447686, "grad_norm": 0.1872394539219938, "learning_rate": 2.877465857359636e-05, "loss": 0.3218, "num_tokens": 809120378.0, "step": 4237 }, { "epoch": 1.4464925755248337, "grad_norm": 0.24840735572895264, "learning_rate": 2.8768335862417806e-05, "loss": 0.3112, "num_tokens": 809310955.0, "step": 4238 }, { "epoch": 1.4468339307048985, "grad_norm": 0.21700327511832113, "learning_rate": 2.8762013151239253e-05, "loss": 0.3162, "num_tokens": 809516749.0, "step": 4239 }, { "epoch": 1.4471752858849634, "grad_norm": 0.19602122370945058, "learning_rate": 2.87556904400607e-05, "loss": 0.3257, "num_tokens": 809693450.0, "step": 4240 }, { "epoch": 1.4475166410650282, "grad_norm": 0.2782385789476735, "learning_rate": 2.8749367728882147e-05, "loss": 0.3194, "num_tokens": 809868311.0, "step": 4241 }, { "epoch": 1.447857996245093, "grad_norm": 0.19777483671143703, "learning_rate": 2.874304501770359e-05, "loss": 0.3175, "num_tokens": 810060977.0, "step": 4242 }, { "epoch": 1.4481993514251579, "grad_norm": 0.3301874984235815, "learning_rate": 2.8736722306525038e-05, "loss": 0.2825, "num_tokens": 810261620.0, "step": 4243 }, { "epoch": 1.4485407066052227, "grad_norm": 0.2002938572213423, "learning_rate": 2.8730399595346484e-05, "loss": 0.2873, "num_tokens": 810448292.0, "step": 4244 }, { "epoch": 1.4488820617852876, "grad_norm": 0.19379722287722062, "learning_rate": 2.872407688416793e-05, "loss": 0.3174, "num_tokens": 810675885.0, "step": 4245 }, { "epoch": 1.4492234169653524, "grad_norm": 0.2378400624270848, "learning_rate": 2.8717754172989382e-05, "loss": 0.3103, "num_tokens": 810864771.0, "step": 4246 }, { "epoch": 1.4495647721454172, "grad_norm": 0.2102448714407642, "learning_rate": 2.871143146181083e-05, "loss": 0.3087, "num_tokens": 811044263.0, "step": 4247 }, { "epoch": 1.449906127325482, "grad_norm": 0.21342261962038717, "learning_rate": 2.870510875063227e-05, "loss": 0.2942, "num_tokens": 811238403.0, "step": 4248 }, { "epoch": 1.4502474825055471, "grad_norm": 0.2223669260038357, "learning_rate": 2.869878603945372e-05, "loss": 0.2731, "num_tokens": 811417061.0, "step": 4249 }, { "epoch": 1.4505888376856118, "grad_norm": 0.25393580739408167, "learning_rate": 2.8692463328275166e-05, "loss": 0.3055, "num_tokens": 811618248.0, "step": 4250 }, { "epoch": 1.4509301928656768, "grad_norm": 0.20576187554275124, "learning_rate": 2.8686140617096613e-05, "loss": 0.3002, "num_tokens": 811817645.0, "step": 4251 }, { "epoch": 1.4512715480457417, "grad_norm": 0.20840991325911176, "learning_rate": 2.867981790591806e-05, "loss": 0.2963, "num_tokens": 811989189.0, "step": 4252 }, { "epoch": 1.4516129032258065, "grad_norm": 0.21865477786890894, "learning_rate": 2.8673495194739507e-05, "loss": 0.307, "num_tokens": 812181957.0, "step": 4253 }, { "epoch": 1.4519542584058713, "grad_norm": 0.27062381842268757, "learning_rate": 2.866717248356095e-05, "loss": 0.3119, "num_tokens": 812364902.0, "step": 4254 }, { "epoch": 1.4522956135859362, "grad_norm": 0.19778624813794965, "learning_rate": 2.8660849772382398e-05, "loss": 0.2991, "num_tokens": 812570423.0, "step": 4255 }, { "epoch": 1.452636968766001, "grad_norm": 0.3456431412469747, "learning_rate": 2.8654527061203845e-05, "loss": 0.3258, "num_tokens": 812747412.0, "step": 4256 }, { "epoch": 1.4529783239460659, "grad_norm": 0.22865293693394, "learning_rate": 2.8648204350025292e-05, "loss": 0.293, "num_tokens": 812940108.0, "step": 4257 }, { "epoch": 1.4533196791261307, "grad_norm": 0.23038768205017798, "learning_rate": 2.864188163884674e-05, "loss": 0.3173, "num_tokens": 813110896.0, "step": 4258 }, { "epoch": 1.4536610343061955, "grad_norm": 0.21689286729328358, "learning_rate": 2.863555892766819e-05, "loss": 0.3206, "num_tokens": 813326067.0, "step": 4259 }, { "epoch": 1.4540023894862604, "grad_norm": 0.21223396146533163, "learning_rate": 2.862923621648963e-05, "loss": 0.3075, "num_tokens": 813513712.0, "step": 4260 }, { "epoch": 1.4543437446663252, "grad_norm": 0.2335187000222825, "learning_rate": 2.8622913505311076e-05, "loss": 0.3103, "num_tokens": 813697826.0, "step": 4261 }, { "epoch": 1.4546850998463903, "grad_norm": 0.22900297196632546, "learning_rate": 2.8616590794132523e-05, "loss": 0.2648, "num_tokens": 813872770.0, "step": 4262 }, { "epoch": 1.455026455026455, "grad_norm": 0.2415287116185243, "learning_rate": 2.8610268082953974e-05, "loss": 0.2711, "num_tokens": 814021978.0, "step": 4263 }, { "epoch": 1.45536781020652, "grad_norm": 0.23423007489829234, "learning_rate": 2.860394537177542e-05, "loss": 0.301, "num_tokens": 814222809.0, "step": 4264 }, { "epoch": 1.4557091653865848, "grad_norm": 0.17724914466975525, "learning_rate": 2.8597622660596868e-05, "loss": 0.2984, "num_tokens": 814401551.0, "step": 4265 }, { "epoch": 1.4560505205666496, "grad_norm": 0.25657232823879517, "learning_rate": 2.859129994941831e-05, "loss": 0.3218, "num_tokens": 814620016.0, "step": 4266 }, { "epoch": 1.4563918757467145, "grad_norm": 0.23426506845274978, "learning_rate": 2.858497723823976e-05, "loss": 0.3138, "num_tokens": 814811652.0, "step": 4267 }, { "epoch": 1.4567332309267793, "grad_norm": 0.19608313519662024, "learning_rate": 2.8578654527061205e-05, "loss": 0.3676, "num_tokens": 815063670.0, "step": 4268 }, { "epoch": 1.4570745861068441, "grad_norm": 0.25020535878477695, "learning_rate": 2.8572331815882652e-05, "loss": 0.2913, "num_tokens": 815241494.0, "step": 4269 }, { "epoch": 1.457415941286909, "grad_norm": 0.2183038416912101, "learning_rate": 2.85660091047041e-05, "loss": 0.3075, "num_tokens": 815439803.0, "step": 4270 }, { "epoch": 1.4577572964669738, "grad_norm": 0.19297413388693305, "learning_rate": 2.8559686393525546e-05, "loss": 0.3127, "num_tokens": 815603373.0, "step": 4271 }, { "epoch": 1.4580986516470387, "grad_norm": 0.2462454037601782, "learning_rate": 2.855336368234699e-05, "loss": 0.2814, "num_tokens": 815767571.0, "step": 4272 }, { "epoch": 1.4584400068271037, "grad_norm": 0.2210483877601591, "learning_rate": 2.8547040971168437e-05, "loss": 0.3404, "num_tokens": 815967607.0, "step": 4273 }, { "epoch": 1.4587813620071683, "grad_norm": 0.19345223253805383, "learning_rate": 2.8540718259989884e-05, "loss": 0.3073, "num_tokens": 816171772.0, "step": 4274 }, { "epoch": 1.4591227171872334, "grad_norm": 0.23759574227316657, "learning_rate": 2.853439554881133e-05, "loss": 0.2981, "num_tokens": 816339376.0, "step": 4275 }, { "epoch": 1.4594640723672982, "grad_norm": 0.23761360145146604, "learning_rate": 2.852807283763278e-05, "loss": 0.2927, "num_tokens": 816509932.0, "step": 4276 }, { "epoch": 1.459805427547363, "grad_norm": 0.2214634715825088, "learning_rate": 2.8521750126454228e-05, "loss": 0.3139, "num_tokens": 816690653.0, "step": 4277 }, { "epoch": 1.460146782727428, "grad_norm": 0.2348359697963079, "learning_rate": 2.851542741527567e-05, "loss": 0.3184, "num_tokens": 816913390.0, "step": 4278 }, { "epoch": 1.4604881379074928, "grad_norm": 0.20683652895558594, "learning_rate": 2.850910470409712e-05, "loss": 0.3193, "num_tokens": 817108839.0, "step": 4279 }, { "epoch": 1.4608294930875576, "grad_norm": 0.21826468971220667, "learning_rate": 2.8502781992918566e-05, "loss": 0.317, "num_tokens": 817268368.0, "step": 4280 }, { "epoch": 1.4611708482676224, "grad_norm": 0.2667397125197888, "learning_rate": 2.8496459281740013e-05, "loss": 0.3251, "num_tokens": 817467353.0, "step": 4281 }, { "epoch": 1.4615122034476873, "grad_norm": 0.17563715184901305, "learning_rate": 2.849013657056146e-05, "loss": 0.2995, "num_tokens": 817662080.0, "step": 4282 }, { "epoch": 1.4618535586277521, "grad_norm": 0.20662816301915207, "learning_rate": 2.8483813859382907e-05, "loss": 0.3103, "num_tokens": 817856941.0, "step": 4283 }, { "epoch": 1.462194913807817, "grad_norm": 0.21683249433726665, "learning_rate": 2.847749114820435e-05, "loss": 0.2852, "num_tokens": 818035929.0, "step": 4284 }, { "epoch": 1.4625362689878818, "grad_norm": 0.25798498745278503, "learning_rate": 2.8471168437025797e-05, "loss": 0.2876, "num_tokens": 818188931.0, "step": 4285 }, { "epoch": 1.4628776241679469, "grad_norm": 0.1986554023226999, "learning_rate": 2.8464845725847244e-05, "loss": 0.2741, "num_tokens": 818380303.0, "step": 4286 }, { "epoch": 1.4632189793480115, "grad_norm": 0.20916490207778626, "learning_rate": 2.845852301466869e-05, "loss": 0.2871, "num_tokens": 818572114.0, "step": 4287 }, { "epoch": 1.4635603345280765, "grad_norm": 0.20616744176136004, "learning_rate": 2.8452200303490138e-05, "loss": 0.276, "num_tokens": 818773898.0, "step": 4288 }, { "epoch": 1.4639016897081414, "grad_norm": 0.21709599141130803, "learning_rate": 2.844587759231159e-05, "loss": 0.3233, "num_tokens": 818949588.0, "step": 4289 }, { "epoch": 1.4642430448882062, "grad_norm": 0.22629026119342016, "learning_rate": 2.843955488113303e-05, "loss": 0.2795, "num_tokens": 819134021.0, "step": 4290 }, { "epoch": 1.464584400068271, "grad_norm": 0.1944332793559937, "learning_rate": 2.8433232169954476e-05, "loss": 0.3126, "num_tokens": 819341947.0, "step": 4291 }, { "epoch": 1.464925755248336, "grad_norm": 0.22569075266112612, "learning_rate": 2.8426909458775923e-05, "loss": 0.3051, "num_tokens": 819547920.0, "step": 4292 }, { "epoch": 1.4652671104284007, "grad_norm": 0.22001484107377084, "learning_rate": 2.8420586747597373e-05, "loss": 0.2802, "num_tokens": 819749564.0, "step": 4293 }, { "epoch": 1.4656084656084656, "grad_norm": 0.22152706261643323, "learning_rate": 2.841426403641882e-05, "loss": 0.3256, "num_tokens": 819957006.0, "step": 4294 }, { "epoch": 1.4659498207885304, "grad_norm": 0.23954879796138306, "learning_rate": 2.8407941325240267e-05, "loss": 0.3403, "num_tokens": 820145352.0, "step": 4295 }, { "epoch": 1.4662911759685953, "grad_norm": 0.22967597765686007, "learning_rate": 2.840161861406171e-05, "loss": 0.336, "num_tokens": 820356188.0, "step": 4296 }, { "epoch": 1.46663253114866, "grad_norm": 0.22424443792405258, "learning_rate": 2.8395295902883158e-05, "loss": 0.3234, "num_tokens": 820566697.0, "step": 4297 }, { "epoch": 1.466973886328725, "grad_norm": 0.19072659864905048, "learning_rate": 2.8388973191704605e-05, "loss": 0.3191, "num_tokens": 820780231.0, "step": 4298 }, { "epoch": 1.46731524150879, "grad_norm": 0.27019533570119814, "learning_rate": 2.838265048052605e-05, "loss": 0.3133, "num_tokens": 820961727.0, "step": 4299 }, { "epoch": 1.4676565966888546, "grad_norm": 0.20729533591458604, "learning_rate": 2.83763277693475e-05, "loss": 0.2753, "num_tokens": 821143066.0, "step": 4300 }, { "epoch": 1.4679979518689197, "grad_norm": 0.1997945155036746, "learning_rate": 2.8370005058168946e-05, "loss": 0.2989, "num_tokens": 821341912.0, "step": 4301 }, { "epoch": 1.4683393070489845, "grad_norm": 0.22300456290627904, "learning_rate": 2.836368234699039e-05, "loss": 0.3264, "num_tokens": 821522250.0, "step": 4302 }, { "epoch": 1.4686806622290494, "grad_norm": 0.2777266205140742, "learning_rate": 2.8357359635811836e-05, "loss": 0.3442, "num_tokens": 821690928.0, "step": 4303 }, { "epoch": 1.4690220174091142, "grad_norm": 0.2075962562835024, "learning_rate": 2.8351036924633283e-05, "loss": 0.3102, "num_tokens": 821906919.0, "step": 4304 }, { "epoch": 1.469363372589179, "grad_norm": 0.2229137740608309, "learning_rate": 2.834471421345473e-05, "loss": 0.326, "num_tokens": 822080168.0, "step": 4305 }, { "epoch": 1.4697047277692439, "grad_norm": 0.2103406164886041, "learning_rate": 2.833839150227618e-05, "loss": 0.327, "num_tokens": 822292371.0, "step": 4306 }, { "epoch": 1.4700460829493087, "grad_norm": 0.20810636135950092, "learning_rate": 2.833206879109762e-05, "loss": 0.2811, "num_tokens": 822459776.0, "step": 4307 }, { "epoch": 1.4703874381293736, "grad_norm": 0.2030078709064102, "learning_rate": 2.8325746079919068e-05, "loss": 0.3077, "num_tokens": 822644136.0, "step": 4308 }, { "epoch": 1.4707287933094384, "grad_norm": 0.24158276398898648, "learning_rate": 2.8319423368740518e-05, "loss": 0.289, "num_tokens": 822809623.0, "step": 4309 }, { "epoch": 1.4710701484895035, "grad_norm": 0.20995973227397152, "learning_rate": 2.8313100657561965e-05, "loss": 0.3004, "num_tokens": 822988722.0, "step": 4310 }, { "epoch": 1.471411503669568, "grad_norm": 0.25392192371622657, "learning_rate": 2.8306777946383412e-05, "loss": 0.3321, "num_tokens": 823207828.0, "step": 4311 }, { "epoch": 1.4717528588496331, "grad_norm": 0.2571267077805252, "learning_rate": 2.830045523520486e-05, "loss": 0.3087, "num_tokens": 823363231.0, "step": 4312 }, { "epoch": 1.472094214029698, "grad_norm": 0.2125574443239691, "learning_rate": 2.8294132524026303e-05, "loss": 0.2842, "num_tokens": 823544402.0, "step": 4313 }, { "epoch": 1.4724355692097628, "grad_norm": 0.22672614312356723, "learning_rate": 2.828780981284775e-05, "loss": 0.3002, "num_tokens": 823706363.0, "step": 4314 }, { "epoch": 1.4727769243898277, "grad_norm": 0.23634458206776926, "learning_rate": 2.8281487101669197e-05, "loss": 0.2997, "num_tokens": 823918459.0, "step": 4315 }, { "epoch": 1.4731182795698925, "grad_norm": 0.22564470887691146, "learning_rate": 2.8275164390490644e-05, "loss": 0.3082, "num_tokens": 824100965.0, "step": 4316 }, { "epoch": 1.4734596347499573, "grad_norm": 0.21590044582214318, "learning_rate": 2.826884167931209e-05, "loss": 0.3152, "num_tokens": 824265343.0, "step": 4317 }, { "epoch": 1.4738009899300222, "grad_norm": 0.23621388492753959, "learning_rate": 2.8262518968133538e-05, "loss": 0.2923, "num_tokens": 824441930.0, "step": 4318 }, { "epoch": 1.474142345110087, "grad_norm": 0.20648876658950757, "learning_rate": 2.825619625695498e-05, "loss": 0.303, "num_tokens": 824599123.0, "step": 4319 }, { "epoch": 1.4744837002901519, "grad_norm": 0.23825326831678628, "learning_rate": 2.8249873545776428e-05, "loss": 0.3068, "num_tokens": 824807046.0, "step": 4320 }, { "epoch": 1.4748250554702167, "grad_norm": 0.22055065374502797, "learning_rate": 2.8243550834597875e-05, "loss": 0.3187, "num_tokens": 825009785.0, "step": 4321 }, { "epoch": 1.4751664106502815, "grad_norm": 0.2279885883905479, "learning_rate": 2.8237228123419322e-05, "loss": 0.2893, "num_tokens": 825212521.0, "step": 4322 }, { "epoch": 1.4755077658303466, "grad_norm": 0.2293679123833798, "learning_rate": 2.8230905412240772e-05, "loss": 0.3624, "num_tokens": 825408251.0, "step": 4323 }, { "epoch": 1.4758491210104112, "grad_norm": 0.2129282389976253, "learning_rate": 2.822458270106222e-05, "loss": 0.2992, "num_tokens": 825606665.0, "step": 4324 }, { "epoch": 1.4761904761904763, "grad_norm": 0.23897778527857277, "learning_rate": 2.821825998988366e-05, "loss": 0.307, "num_tokens": 825798018.0, "step": 4325 }, { "epoch": 1.4765318313705411, "grad_norm": 0.2177004234036593, "learning_rate": 2.821193727870511e-05, "loss": 0.2963, "num_tokens": 825973846.0, "step": 4326 }, { "epoch": 1.476873186550606, "grad_norm": 0.2051447939576401, "learning_rate": 2.8205614567526557e-05, "loss": 0.2969, "num_tokens": 826161857.0, "step": 4327 }, { "epoch": 1.4772145417306708, "grad_norm": 0.2397623666656396, "learning_rate": 2.8199291856348004e-05, "loss": 0.3123, "num_tokens": 826319398.0, "step": 4328 }, { "epoch": 1.4775558969107356, "grad_norm": 0.22512082150151475, "learning_rate": 2.819296914516945e-05, "loss": 0.2964, "num_tokens": 826494136.0, "step": 4329 }, { "epoch": 1.4778972520908005, "grad_norm": 0.23212680946665964, "learning_rate": 2.8186646433990898e-05, "loss": 0.2895, "num_tokens": 826701134.0, "step": 4330 }, { "epoch": 1.4782386072708653, "grad_norm": 0.20516923985799934, "learning_rate": 2.818032372281234e-05, "loss": 0.3114, "num_tokens": 826904792.0, "step": 4331 }, { "epoch": 1.4785799624509302, "grad_norm": 0.20907200638973378, "learning_rate": 2.817400101163379e-05, "loss": 0.2867, "num_tokens": 827084418.0, "step": 4332 }, { "epoch": 1.478921317630995, "grad_norm": 0.2085092949475117, "learning_rate": 2.8167678300455235e-05, "loss": 0.3048, "num_tokens": 827289680.0, "step": 4333 }, { "epoch": 1.4792626728110598, "grad_norm": 0.22204606764379872, "learning_rate": 2.8161355589276682e-05, "loss": 0.3188, "num_tokens": 827467799.0, "step": 4334 }, { "epoch": 1.4796040279911247, "grad_norm": 0.21510185228670547, "learning_rate": 2.815503287809813e-05, "loss": 0.3061, "num_tokens": 827700923.0, "step": 4335 }, { "epoch": 1.4799453831711897, "grad_norm": 0.21376195534928044, "learning_rate": 2.814871016691958e-05, "loss": 0.3012, "num_tokens": 827894313.0, "step": 4336 }, { "epoch": 1.4802867383512543, "grad_norm": 0.22675507428962732, "learning_rate": 2.814238745574102e-05, "loss": 0.3348, "num_tokens": 828090065.0, "step": 4337 }, { "epoch": 1.4806280935313194, "grad_norm": 0.2320476669623698, "learning_rate": 2.8136064744562467e-05, "loss": 0.2954, "num_tokens": 828251134.0, "step": 4338 }, { "epoch": 1.4809694487113843, "grad_norm": 0.19984250554591282, "learning_rate": 2.8129742033383914e-05, "loss": 0.326, "num_tokens": 828474611.0, "step": 4339 }, { "epoch": 1.481310803891449, "grad_norm": 0.24542870335651612, "learning_rate": 2.8123419322205364e-05, "loss": 0.3038, "num_tokens": 828650001.0, "step": 4340 }, { "epoch": 1.481652159071514, "grad_norm": 0.1887835940766081, "learning_rate": 2.811709661102681e-05, "loss": 0.3437, "num_tokens": 828845318.0, "step": 4341 }, { "epoch": 1.4819935142515788, "grad_norm": 0.22423974169223876, "learning_rate": 2.811077389984826e-05, "loss": 0.3307, "num_tokens": 829064717.0, "step": 4342 }, { "epoch": 1.4823348694316436, "grad_norm": 0.20391762334320357, "learning_rate": 2.8104451188669702e-05, "loss": 0.3269, "num_tokens": 829257902.0, "step": 4343 }, { "epoch": 1.4826762246117084, "grad_norm": 0.22463602571776287, "learning_rate": 2.809812847749115e-05, "loss": 0.3566, "num_tokens": 829465040.0, "step": 4344 }, { "epoch": 1.4830175797917733, "grad_norm": 0.29434914984519367, "learning_rate": 2.8091805766312596e-05, "loss": 0.2974, "num_tokens": 829629048.0, "step": 4345 }, { "epoch": 1.4833589349718381, "grad_norm": 0.22545389947673164, "learning_rate": 2.8085483055134043e-05, "loss": 0.2979, "num_tokens": 829828610.0, "step": 4346 }, { "epoch": 1.4837002901519032, "grad_norm": 0.22499823548881598, "learning_rate": 2.807916034395549e-05, "loss": 0.2861, "num_tokens": 829962579.0, "step": 4347 }, { "epoch": 1.4840416453319678, "grad_norm": 0.23287607605976746, "learning_rate": 2.8072837632776937e-05, "loss": 0.2978, "num_tokens": 830115814.0, "step": 4348 }, { "epoch": 1.4843830005120329, "grad_norm": 0.2148011207912305, "learning_rate": 2.806651492159838e-05, "loss": 0.3089, "num_tokens": 830322452.0, "step": 4349 }, { "epoch": 1.4847243556920977, "grad_norm": 0.2290490533138267, "learning_rate": 2.8060192210419827e-05, "loss": 0.2962, "num_tokens": 830503465.0, "step": 4350 }, { "epoch": 1.4850657108721625, "grad_norm": 0.2040486562664313, "learning_rate": 2.8053869499241274e-05, "loss": 0.2796, "num_tokens": 830661852.0, "step": 4351 }, { "epoch": 1.4854070660522274, "grad_norm": 0.2189585779126803, "learning_rate": 2.804754678806272e-05, "loss": 0.279, "num_tokens": 830817484.0, "step": 4352 }, { "epoch": 1.4857484212322922, "grad_norm": 0.24216984464466715, "learning_rate": 2.8041224076884172e-05, "loss": 0.2978, "num_tokens": 830988888.0, "step": 4353 }, { "epoch": 1.486089776412357, "grad_norm": 0.2042919979545537, "learning_rate": 2.803490136570562e-05, "loss": 0.308, "num_tokens": 831167895.0, "step": 4354 }, { "epoch": 1.486431131592422, "grad_norm": 0.218904201730901, "learning_rate": 2.802857865452706e-05, "loss": 0.2913, "num_tokens": 831370040.0, "step": 4355 }, { "epoch": 1.4867724867724867, "grad_norm": 0.22023219405056746, "learning_rate": 2.802225594334851e-05, "loss": 0.289, "num_tokens": 831561138.0, "step": 4356 }, { "epoch": 1.4871138419525516, "grad_norm": 0.21328895373142423, "learning_rate": 2.8015933232169956e-05, "loss": 0.3569, "num_tokens": 831805300.0, "step": 4357 }, { "epoch": 1.4874551971326164, "grad_norm": 0.22348080146663932, "learning_rate": 2.8009610520991403e-05, "loss": 0.3065, "num_tokens": 831977827.0, "step": 4358 }, { "epoch": 1.4877965523126813, "grad_norm": 0.23008544801994868, "learning_rate": 2.800328780981285e-05, "loss": 0.3246, "num_tokens": 832205842.0, "step": 4359 }, { "epoch": 1.4881379074927463, "grad_norm": 0.21731442974485904, "learning_rate": 2.7996965098634297e-05, "loss": 0.3053, "num_tokens": 832380807.0, "step": 4360 }, { "epoch": 1.488479262672811, "grad_norm": 0.2411049917689487, "learning_rate": 2.799064238745574e-05, "loss": 0.313, "num_tokens": 832575904.0, "step": 4361 }, { "epoch": 1.488820617852876, "grad_norm": 0.21653314401138227, "learning_rate": 2.7984319676277188e-05, "loss": 0.2978, "num_tokens": 832744375.0, "step": 4362 }, { "epoch": 1.4891619730329408, "grad_norm": 0.2399345108240723, "learning_rate": 2.7977996965098635e-05, "loss": 0.3066, "num_tokens": 832927439.0, "step": 4363 }, { "epoch": 1.4895033282130057, "grad_norm": 0.22128837184169928, "learning_rate": 2.7971674253920082e-05, "loss": 0.2768, "num_tokens": 833123746.0, "step": 4364 }, { "epoch": 1.4898446833930705, "grad_norm": 0.2147091455012306, "learning_rate": 2.796535154274153e-05, "loss": 0.3248, "num_tokens": 833349562.0, "step": 4365 }, { "epoch": 1.4901860385731354, "grad_norm": 0.21425956270318575, "learning_rate": 2.795902883156298e-05, "loss": 0.3124, "num_tokens": 833543626.0, "step": 4366 }, { "epoch": 1.4905273937532002, "grad_norm": 0.20792892819124953, "learning_rate": 2.795270612038442e-05, "loss": 0.3149, "num_tokens": 833738844.0, "step": 4367 }, { "epoch": 1.490868748933265, "grad_norm": 0.2311770295387851, "learning_rate": 2.7946383409205866e-05, "loss": 0.2834, "num_tokens": 833918341.0, "step": 4368 }, { "epoch": 1.4912101041133299, "grad_norm": 0.22669882765704194, "learning_rate": 2.7940060698027313e-05, "loss": 0.3173, "num_tokens": 834100541.0, "step": 4369 }, { "epoch": 1.4915514592933947, "grad_norm": 0.21683582642149068, "learning_rate": 2.7933737986848764e-05, "loss": 0.2881, "num_tokens": 834307744.0, "step": 4370 }, { "epoch": 1.4918928144734596, "grad_norm": 0.19807633348425846, "learning_rate": 2.792741527567021e-05, "loss": 0.3039, "num_tokens": 834530289.0, "step": 4371 }, { "epoch": 1.4922341696535244, "grad_norm": 0.21977410888328697, "learning_rate": 2.7921092564491658e-05, "loss": 0.3103, "num_tokens": 834708002.0, "step": 4372 }, { "epoch": 1.4925755248335895, "grad_norm": 0.21652659030772325, "learning_rate": 2.79147698533131e-05, "loss": 0.3242, "num_tokens": 834909525.0, "step": 4373 }, { "epoch": 1.492916880013654, "grad_norm": 0.221909395348342, "learning_rate": 2.7908447142134548e-05, "loss": 0.3306, "num_tokens": 835096671.0, "step": 4374 }, { "epoch": 1.4932582351937191, "grad_norm": 0.2331034774545193, "learning_rate": 2.7902124430955995e-05, "loss": 0.3745, "num_tokens": 835279195.0, "step": 4375 }, { "epoch": 1.493599590373784, "grad_norm": 0.24682924107664417, "learning_rate": 2.7895801719777442e-05, "loss": 0.315, "num_tokens": 835464847.0, "step": 4376 }, { "epoch": 1.4939409455538488, "grad_norm": 0.21260137325692646, "learning_rate": 2.788947900859889e-05, "loss": 0.3143, "num_tokens": 835641433.0, "step": 4377 }, { "epoch": 1.4942823007339137, "grad_norm": 0.23640814068140006, "learning_rate": 2.7883156297420336e-05, "loss": 0.3118, "num_tokens": 835869632.0, "step": 4378 }, { "epoch": 1.4946236559139785, "grad_norm": 0.22879014622837163, "learning_rate": 2.787683358624178e-05, "loss": 0.3371, "num_tokens": 836064308.0, "step": 4379 }, { "epoch": 1.4949650110940433, "grad_norm": 0.2062166263029607, "learning_rate": 2.7870510875063227e-05, "loss": 0.2895, "num_tokens": 836295097.0, "step": 4380 }, { "epoch": 1.4953063662741082, "grad_norm": 0.2088164206528048, "learning_rate": 2.7864188163884674e-05, "loss": 0.2899, "num_tokens": 836502738.0, "step": 4381 }, { "epoch": 1.495647721454173, "grad_norm": 0.22673347486043405, "learning_rate": 2.785786545270612e-05, "loss": 0.314, "num_tokens": 836692852.0, "step": 4382 }, { "epoch": 1.4959890766342379, "grad_norm": 0.21799508004171297, "learning_rate": 2.785154274152757e-05, "loss": 0.3366, "num_tokens": 836908804.0, "step": 4383 }, { "epoch": 1.496330431814303, "grad_norm": 0.2117140380143499, "learning_rate": 2.7845220030349018e-05, "loss": 0.3191, "num_tokens": 837089925.0, "step": 4384 }, { "epoch": 1.4966717869943675, "grad_norm": 0.2269198677093981, "learning_rate": 2.7838897319170458e-05, "loss": 0.2942, "num_tokens": 837261108.0, "step": 4385 }, { "epoch": 1.4970131421744326, "grad_norm": 0.22860827029588388, "learning_rate": 2.783257460799191e-05, "loss": 0.3025, "num_tokens": 837490578.0, "step": 4386 }, { "epoch": 1.4973544973544972, "grad_norm": 0.18164923292170063, "learning_rate": 2.7826251896813356e-05, "loss": 0.2919, "num_tokens": 837659600.0, "step": 4387 }, { "epoch": 1.4976958525345623, "grad_norm": 0.21795234473429032, "learning_rate": 2.7819929185634803e-05, "loss": 0.3278, "num_tokens": 837859432.0, "step": 4388 }, { "epoch": 1.4980372077146271, "grad_norm": 0.22555710906161736, "learning_rate": 2.781360647445625e-05, "loss": 0.2985, "num_tokens": 838079263.0, "step": 4389 }, { "epoch": 1.498378562894692, "grad_norm": 0.21006563351338037, "learning_rate": 2.7807283763277697e-05, "loss": 0.3022, "num_tokens": 838304087.0, "step": 4390 }, { "epoch": 1.4987199180747568, "grad_norm": 0.21606146841240548, "learning_rate": 2.780096105209914e-05, "loss": 0.3007, "num_tokens": 838458543.0, "step": 4391 }, { "epoch": 1.4990612732548216, "grad_norm": 0.22334811142311523, "learning_rate": 2.7794638340920587e-05, "loss": 0.3337, "num_tokens": 838679772.0, "step": 4392 }, { "epoch": 1.4994026284348865, "grad_norm": 0.216075053942465, "learning_rate": 2.7788315629742034e-05, "loss": 0.305, "num_tokens": 838861998.0, "step": 4393 }, { "epoch": 1.4997439836149513, "grad_norm": 0.21745832488681527, "learning_rate": 2.778199291856348e-05, "loss": 0.288, "num_tokens": 839065487.0, "step": 4394 }, { "epoch": 1.5000853387950164, "grad_norm": 0.21098943379452406, "learning_rate": 2.7775670207384928e-05, "loss": 0.3326, "num_tokens": 839303498.0, "step": 4395 }, { "epoch": 1.500426693975081, "grad_norm": 0.19956237215606376, "learning_rate": 2.776934749620638e-05, "loss": 0.3252, "num_tokens": 839498812.0, "step": 4396 }, { "epoch": 1.500768049155146, "grad_norm": 0.23018612162920976, "learning_rate": 2.776302478502782e-05, "loss": 0.3013, "num_tokens": 839695915.0, "step": 4397 }, { "epoch": 1.5011094043352107, "grad_norm": 0.19611879196672852, "learning_rate": 2.7756702073849266e-05, "loss": 0.3144, "num_tokens": 839917975.0, "step": 4398 }, { "epoch": 1.5014507595152757, "grad_norm": 0.2104156264318518, "learning_rate": 2.7750379362670713e-05, "loss": 0.3104, "num_tokens": 840134748.0, "step": 4399 }, { "epoch": 1.5017921146953404, "grad_norm": 0.21357910925895943, "learning_rate": 2.7744056651492163e-05, "loss": 0.298, "num_tokens": 840311312.0, "step": 4400 }, { "epoch": 1.5021334698754054, "grad_norm": 0.2314787192282847, "learning_rate": 2.773773394031361e-05, "loss": 0.3064, "num_tokens": 840469359.0, "step": 4401 }, { "epoch": 1.5024748250554703, "grad_norm": 0.22647179018783742, "learning_rate": 2.7731411229135057e-05, "loss": 0.3346, "num_tokens": 840686921.0, "step": 4402 }, { "epoch": 1.502816180235535, "grad_norm": 0.20532958015254682, "learning_rate": 2.77250885179565e-05, "loss": 0.3008, "num_tokens": 840854502.0, "step": 4403 }, { "epoch": 1.5031575354156, "grad_norm": 0.24957892553278285, "learning_rate": 2.7718765806777948e-05, "loss": 0.3149, "num_tokens": 841023019.0, "step": 4404 }, { "epoch": 1.5034988905956648, "grad_norm": 0.20386655897416775, "learning_rate": 2.7712443095599395e-05, "loss": 0.2822, "num_tokens": 841168758.0, "step": 4405 }, { "epoch": 1.5038402457757296, "grad_norm": 0.2290488842789878, "learning_rate": 2.770612038442084e-05, "loss": 0.3058, "num_tokens": 841392201.0, "step": 4406 }, { "epoch": 1.5041816009557945, "grad_norm": 0.19677449678282505, "learning_rate": 2.769979767324229e-05, "loss": 0.3004, "num_tokens": 841593822.0, "step": 4407 }, { "epoch": 1.5045229561358595, "grad_norm": 0.22006266539648786, "learning_rate": 2.7693474962063735e-05, "loss": 0.3106, "num_tokens": 841801624.0, "step": 4408 }, { "epoch": 1.5048643113159241, "grad_norm": 0.21287960786887117, "learning_rate": 2.768715225088518e-05, "loss": 0.3148, "num_tokens": 841999540.0, "step": 4409 }, { "epoch": 1.5052056664959892, "grad_norm": 0.21990189531065185, "learning_rate": 2.7680829539706626e-05, "loss": 0.2967, "num_tokens": 842183446.0, "step": 4410 }, { "epoch": 1.5055470216760538, "grad_norm": 0.2356199067043204, "learning_rate": 2.7674506828528073e-05, "loss": 0.2972, "num_tokens": 842369862.0, "step": 4411 }, { "epoch": 1.5058883768561189, "grad_norm": 0.19149381874439683, "learning_rate": 2.766818411734952e-05, "loss": 0.2732, "num_tokens": 842512881.0, "step": 4412 }, { "epoch": 1.5062297320361835, "grad_norm": 0.2399568688133383, "learning_rate": 2.766186140617097e-05, "loss": 0.3208, "num_tokens": 842720238.0, "step": 4413 }, { "epoch": 1.5065710872162486, "grad_norm": 0.21638202580922958, "learning_rate": 2.7655538694992417e-05, "loss": 0.3096, "num_tokens": 842929026.0, "step": 4414 }, { "epoch": 1.5069124423963134, "grad_norm": 0.20313056689778206, "learning_rate": 2.7649215983813858e-05, "loss": 0.3136, "num_tokens": 843126945.0, "step": 4415 }, { "epoch": 1.5072537975763782, "grad_norm": 0.22737636285892687, "learning_rate": 2.7642893272635305e-05, "loss": 0.3027, "num_tokens": 843270020.0, "step": 4416 }, { "epoch": 1.507595152756443, "grad_norm": 0.23558116289742934, "learning_rate": 2.7636570561456755e-05, "loss": 0.3026, "num_tokens": 843470052.0, "step": 4417 }, { "epoch": 1.507936507936508, "grad_norm": 0.196278095627565, "learning_rate": 2.7630247850278202e-05, "loss": 0.3018, "num_tokens": 843667775.0, "step": 4418 }, { "epoch": 1.5082778631165727, "grad_norm": 0.2054496317898529, "learning_rate": 2.762392513909965e-05, "loss": 0.3334, "num_tokens": 843864215.0, "step": 4419 }, { "epoch": 1.5086192182966376, "grad_norm": 0.20936954253032367, "learning_rate": 2.7617602427921096e-05, "loss": 0.314, "num_tokens": 844092509.0, "step": 4420 }, { "epoch": 1.5089605734767026, "grad_norm": 0.23867514861738154, "learning_rate": 2.761127971674254e-05, "loss": 0.3035, "num_tokens": 844298314.0, "step": 4421 }, { "epoch": 1.5093019286567673, "grad_norm": 0.21011814821659844, "learning_rate": 2.7604957005563986e-05, "loss": 0.2903, "num_tokens": 844481906.0, "step": 4422 }, { "epoch": 1.5096432838368323, "grad_norm": 0.23792121642086536, "learning_rate": 2.7598634294385433e-05, "loss": 0.3082, "num_tokens": 844670198.0, "step": 4423 }, { "epoch": 1.509984639016897, "grad_norm": 0.22533139998645688, "learning_rate": 2.759231158320688e-05, "loss": 0.3216, "num_tokens": 844879972.0, "step": 4424 }, { "epoch": 1.510325994196962, "grad_norm": 0.23106843108480207, "learning_rate": 2.7585988872028327e-05, "loss": 0.2944, "num_tokens": 845054845.0, "step": 4425 }, { "epoch": 1.5106673493770268, "grad_norm": 0.23292689645825937, "learning_rate": 2.7579666160849778e-05, "loss": 0.3271, "num_tokens": 845248416.0, "step": 4426 }, { "epoch": 1.5110087045570917, "grad_norm": 0.20796017747846532, "learning_rate": 2.7573343449671218e-05, "loss": 0.3049, "num_tokens": 845452268.0, "step": 4427 }, { "epoch": 1.5113500597371565, "grad_norm": 0.4571581277336105, "learning_rate": 2.7567020738492665e-05, "loss": 0.295, "num_tokens": 845632979.0, "step": 4428 }, { "epoch": 1.5116914149172214, "grad_norm": 0.21580462424726882, "learning_rate": 2.7560698027314112e-05, "loss": 0.2996, "num_tokens": 845822187.0, "step": 4429 }, { "epoch": 1.5120327700972862, "grad_norm": 0.23477974371963584, "learning_rate": 2.7554375316135562e-05, "loss": 0.3012, "num_tokens": 846044438.0, "step": 4430 }, { "epoch": 1.512374125277351, "grad_norm": 0.22603449443535933, "learning_rate": 2.754805260495701e-05, "loss": 0.3201, "num_tokens": 846212659.0, "step": 4431 }, { "epoch": 1.512715480457416, "grad_norm": 0.19765102504819676, "learning_rate": 2.7541729893778456e-05, "loss": 0.2903, "num_tokens": 846398503.0, "step": 4432 }, { "epoch": 1.5130568356374807, "grad_norm": 0.218197032682783, "learning_rate": 2.75354071825999e-05, "loss": 0.311, "num_tokens": 846532911.0, "step": 4433 }, { "epoch": 1.5133981908175458, "grad_norm": 0.2827201382949427, "learning_rate": 2.7529084471421347e-05, "loss": 0.3342, "num_tokens": 846751533.0, "step": 4434 }, { "epoch": 1.5137395459976104, "grad_norm": 0.20428135539974948, "learning_rate": 2.7522761760242794e-05, "loss": 0.2931, "num_tokens": 846916555.0, "step": 4435 }, { "epoch": 1.5140809011776755, "grad_norm": 0.22096730187783134, "learning_rate": 2.751643904906424e-05, "loss": 0.3182, "num_tokens": 847069668.0, "step": 4436 }, { "epoch": 1.51442225635774, "grad_norm": 0.25813677358786974, "learning_rate": 2.7510116337885688e-05, "loss": 0.3034, "num_tokens": 847224915.0, "step": 4437 }, { "epoch": 1.5147636115378051, "grad_norm": 0.23082797157648013, "learning_rate": 2.7503793626707135e-05, "loss": 0.3162, "num_tokens": 847453049.0, "step": 4438 }, { "epoch": 1.51510496671787, "grad_norm": 0.19631536202103103, "learning_rate": 2.749747091552858e-05, "loss": 0.287, "num_tokens": 847618326.0, "step": 4439 }, { "epoch": 1.5154463218979348, "grad_norm": 0.2525930478405905, "learning_rate": 2.7491148204350025e-05, "loss": 0.3285, "num_tokens": 847794621.0, "step": 4440 }, { "epoch": 1.5157876770779997, "grad_norm": 0.22675466168848762, "learning_rate": 2.7484825493171472e-05, "loss": 0.2925, "num_tokens": 847975104.0, "step": 4441 }, { "epoch": 1.5161290322580645, "grad_norm": 0.23768717226987768, "learning_rate": 2.747850278199292e-05, "loss": 0.3026, "num_tokens": 848145769.0, "step": 4442 }, { "epoch": 1.5164703874381293, "grad_norm": 0.2385697087186089, "learning_rate": 2.747218007081437e-05, "loss": 0.2975, "num_tokens": 848323563.0, "step": 4443 }, { "epoch": 1.5168117426181942, "grad_norm": 0.1970473537283467, "learning_rate": 2.746585735963581e-05, "loss": 0.2904, "num_tokens": 848525383.0, "step": 4444 }, { "epoch": 1.5171530977982592, "grad_norm": 0.2374443511570927, "learning_rate": 2.7459534648457257e-05, "loss": 0.3132, "num_tokens": 848709649.0, "step": 4445 }, { "epoch": 1.5174944529783239, "grad_norm": 0.19576738921233705, "learning_rate": 2.7453211937278704e-05, "loss": 0.3395, "num_tokens": 848960147.0, "step": 4446 }, { "epoch": 1.517835808158389, "grad_norm": 0.23974060287011642, "learning_rate": 2.7446889226100154e-05, "loss": 0.3313, "num_tokens": 849144003.0, "step": 4447 }, { "epoch": 1.5181771633384535, "grad_norm": 0.2264799854978985, "learning_rate": 2.74405665149216e-05, "loss": 0.2992, "num_tokens": 849336187.0, "step": 4448 }, { "epoch": 1.5185185185185186, "grad_norm": 0.228014157496292, "learning_rate": 2.7434243803743048e-05, "loss": 0.2927, "num_tokens": 849518902.0, "step": 4449 }, { "epoch": 1.5188598736985832, "grad_norm": 0.22382674380585427, "learning_rate": 2.7427921092564492e-05, "loss": 0.3069, "num_tokens": 849726486.0, "step": 4450 }, { "epoch": 1.5192012288786483, "grad_norm": 0.2048265563307519, "learning_rate": 2.742159838138594e-05, "loss": 0.2945, "num_tokens": 849922995.0, "step": 4451 }, { "epoch": 1.5195425840587131, "grad_norm": 0.19186608392765753, "learning_rate": 2.7415275670207386e-05, "loss": 0.3036, "num_tokens": 850176662.0, "step": 4452 }, { "epoch": 1.519883939238778, "grad_norm": 0.20195023479217145, "learning_rate": 2.7408952959028833e-05, "loss": 0.3109, "num_tokens": 850361547.0, "step": 4453 }, { "epoch": 1.5202252944188428, "grad_norm": 0.23663941787622636, "learning_rate": 2.740263024785028e-05, "loss": 0.3008, "num_tokens": 850518154.0, "step": 4454 }, { "epoch": 1.5205666495989076, "grad_norm": 0.23100447205124694, "learning_rate": 2.7396307536671727e-05, "loss": 0.2989, "num_tokens": 850687937.0, "step": 4455 }, { "epoch": 1.5209080047789725, "grad_norm": 0.2101216213942011, "learning_rate": 2.738998482549317e-05, "loss": 0.3135, "num_tokens": 850894474.0, "step": 4456 }, { "epoch": 1.5212493599590373, "grad_norm": 0.18900334496893306, "learning_rate": 2.7383662114314617e-05, "loss": 0.3167, "num_tokens": 851083093.0, "step": 4457 }, { "epoch": 1.5215907151391024, "grad_norm": 1.9714456124533175, "learning_rate": 2.7377339403136064e-05, "loss": 0.3222, "num_tokens": 851261318.0, "step": 4458 }, { "epoch": 1.521932070319167, "grad_norm": 0.30269579251864753, "learning_rate": 2.737101669195751e-05, "loss": 0.3111, "num_tokens": 851450558.0, "step": 4459 }, { "epoch": 1.522273425499232, "grad_norm": 0.22095303571021677, "learning_rate": 2.736469398077896e-05, "loss": 0.2934, "num_tokens": 851620260.0, "step": 4460 }, { "epoch": 1.5226147806792967, "grad_norm": 0.20975682051251965, "learning_rate": 2.735837126960041e-05, "loss": 0.334, "num_tokens": 851847837.0, "step": 4461 }, { "epoch": 1.5229561358593617, "grad_norm": 0.21545033699563707, "learning_rate": 2.735204855842185e-05, "loss": 0.3115, "num_tokens": 852046166.0, "step": 4462 }, { "epoch": 1.5232974910394266, "grad_norm": 0.21170776241041445, "learning_rate": 2.73457258472433e-05, "loss": 0.2877, "num_tokens": 852220602.0, "step": 4463 }, { "epoch": 1.5236388462194914, "grad_norm": 0.20210673487447392, "learning_rate": 2.7339403136064746e-05, "loss": 0.3151, "num_tokens": 852422817.0, "step": 4464 }, { "epoch": 1.5239802013995563, "grad_norm": 0.2143735545533872, "learning_rate": 2.7333080424886193e-05, "loss": 0.3353, "num_tokens": 852599712.0, "step": 4465 }, { "epoch": 1.524321556579621, "grad_norm": 0.2430655701937923, "learning_rate": 2.732675771370764e-05, "loss": 0.3183, "num_tokens": 852819910.0, "step": 4466 }, { "epoch": 1.524662911759686, "grad_norm": 0.23187133022554396, "learning_rate": 2.7320435002529087e-05, "loss": 0.3201, "num_tokens": 853019051.0, "step": 4467 }, { "epoch": 1.5250042669397508, "grad_norm": 0.2199604172320008, "learning_rate": 2.731411229135053e-05, "loss": 0.2906, "num_tokens": 853194593.0, "step": 4468 }, { "epoch": 1.5253456221198156, "grad_norm": 0.20748970224530963, "learning_rate": 2.7307789580171978e-05, "loss": 0.3132, "num_tokens": 853376401.0, "step": 4469 }, { "epoch": 1.5256869772998805, "grad_norm": 0.26386011535588094, "learning_rate": 2.7301466868993425e-05, "loss": 0.3237, "num_tokens": 853533026.0, "step": 4470 }, { "epoch": 1.5260283324799455, "grad_norm": 0.22987578537919653, "learning_rate": 2.7295144157814872e-05, "loss": 0.3104, "num_tokens": 853708647.0, "step": 4471 }, { "epoch": 1.5263696876600101, "grad_norm": 0.2186397934882857, "learning_rate": 2.728882144663632e-05, "loss": 0.2936, "num_tokens": 853904593.0, "step": 4472 }, { "epoch": 1.5267110428400752, "grad_norm": 0.19789063204564372, "learning_rate": 2.728249873545777e-05, "loss": 0.3388, "num_tokens": 854125423.0, "step": 4473 }, { "epoch": 1.5270523980201398, "grad_norm": 0.22750723865384975, "learning_rate": 2.727617602427921e-05, "loss": 0.3022, "num_tokens": 854306573.0, "step": 4474 }, { "epoch": 1.5273937532002049, "grad_norm": 0.29985086045003134, "learning_rate": 2.7269853313100656e-05, "loss": 0.2992, "num_tokens": 854486052.0, "step": 4475 }, { "epoch": 1.5277351083802697, "grad_norm": 0.20146557898775824, "learning_rate": 2.7263530601922103e-05, "loss": 0.3155, "num_tokens": 854675083.0, "step": 4476 }, { "epoch": 1.5280764635603346, "grad_norm": 0.22331397272084474, "learning_rate": 2.7257207890743554e-05, "loss": 0.2958, "num_tokens": 854829248.0, "step": 4477 }, { "epoch": 1.5284178187403994, "grad_norm": 0.21355251121913, "learning_rate": 2.7250885179565e-05, "loss": 0.2883, "num_tokens": 855001797.0, "step": 4478 }, { "epoch": 1.5287591739204642, "grad_norm": 0.21447004785655982, "learning_rate": 2.7244562468386448e-05, "loss": 0.2825, "num_tokens": 855193727.0, "step": 4479 }, { "epoch": 1.529100529100529, "grad_norm": 0.22272193518444838, "learning_rate": 2.723823975720789e-05, "loss": 0.3079, "num_tokens": 855394433.0, "step": 4480 }, { "epoch": 1.529441884280594, "grad_norm": 0.21156499740097356, "learning_rate": 2.7231917046029338e-05, "loss": 0.3136, "num_tokens": 855606477.0, "step": 4481 }, { "epoch": 1.529783239460659, "grad_norm": 0.20295588503652245, "learning_rate": 2.7225594334850785e-05, "loss": 0.3216, "num_tokens": 855783394.0, "step": 4482 }, { "epoch": 1.5301245946407236, "grad_norm": 0.2405087520954947, "learning_rate": 2.7219271623672232e-05, "loss": 0.3357, "num_tokens": 856004728.0, "step": 4483 }, { "epoch": 1.5304659498207887, "grad_norm": 0.20728620191739036, "learning_rate": 2.721294891249368e-05, "loss": 0.2929, "num_tokens": 856220472.0, "step": 4484 }, { "epoch": 1.5308073050008533, "grad_norm": 0.19778220386235215, "learning_rate": 2.7206626201315126e-05, "loss": 0.3124, "num_tokens": 856410451.0, "step": 4485 }, { "epoch": 1.5311486601809183, "grad_norm": 0.21945358959530678, "learning_rate": 2.720030349013657e-05, "loss": 0.3421, "num_tokens": 856617331.0, "step": 4486 }, { "epoch": 1.531490015360983, "grad_norm": 0.22875368320757392, "learning_rate": 2.7193980778958017e-05, "loss": 0.3151, "num_tokens": 856841519.0, "step": 4487 }, { "epoch": 1.531831370541048, "grad_norm": 0.21270254724384816, "learning_rate": 2.7187658067779464e-05, "loss": 0.3156, "num_tokens": 857050122.0, "step": 4488 }, { "epoch": 1.5321727257211128, "grad_norm": 0.19390938711817274, "learning_rate": 2.718133535660091e-05, "loss": 0.291, "num_tokens": 857240807.0, "step": 4489 }, { "epoch": 1.5325140809011777, "grad_norm": 0.21857443051202038, "learning_rate": 2.717501264542236e-05, "loss": 0.3164, "num_tokens": 857415362.0, "step": 4490 }, { "epoch": 1.5328554360812425, "grad_norm": 0.2346986051267861, "learning_rate": 2.7168689934243808e-05, "loss": 0.3126, "num_tokens": 857605624.0, "step": 4491 }, { "epoch": 1.5331967912613074, "grad_norm": 0.2119033245659515, "learning_rate": 2.7162367223065248e-05, "loss": 0.302, "num_tokens": 857770548.0, "step": 4492 }, { "epoch": 1.5335381464413722, "grad_norm": 0.2431635632833372, "learning_rate": 2.7156044511886695e-05, "loss": 0.2857, "num_tokens": 857948942.0, "step": 4493 }, { "epoch": 1.533879501621437, "grad_norm": 0.20962398715649186, "learning_rate": 2.7149721800708146e-05, "loss": 0.2961, "num_tokens": 858152628.0, "step": 4494 }, { "epoch": 1.534220856801502, "grad_norm": 0.19852358072769166, "learning_rate": 2.7143399089529593e-05, "loss": 0.3104, "num_tokens": 858335585.0, "step": 4495 }, { "epoch": 1.5345622119815667, "grad_norm": 0.228802161654571, "learning_rate": 2.713707637835104e-05, "loss": 0.3404, "num_tokens": 858536446.0, "step": 4496 }, { "epoch": 1.5349035671616318, "grad_norm": 0.21699758853137463, "learning_rate": 2.7130753667172486e-05, "loss": 0.3408, "num_tokens": 858748285.0, "step": 4497 }, { "epoch": 1.5352449223416964, "grad_norm": 0.22230062368264594, "learning_rate": 2.712443095599393e-05, "loss": 0.3151, "num_tokens": 858925703.0, "step": 4498 }, { "epoch": 1.5355862775217615, "grad_norm": 0.20658519322512536, "learning_rate": 2.7118108244815377e-05, "loss": 0.2955, "num_tokens": 859118322.0, "step": 4499 }, { "epoch": 1.535927632701826, "grad_norm": 0.2273548925327679, "learning_rate": 2.7111785533636824e-05, "loss": 0.2996, "num_tokens": 859312217.0, "step": 4500 }, { "epoch": 1.5362689878818911, "grad_norm": 0.20406829978209745, "learning_rate": 2.710546282245827e-05, "loss": 0.3028, "num_tokens": 859501420.0, "step": 4501 }, { "epoch": 1.536610343061956, "grad_norm": 0.21264856391835127, "learning_rate": 2.7099140111279718e-05, "loss": 0.2904, "num_tokens": 859691574.0, "step": 4502 }, { "epoch": 1.5369516982420208, "grad_norm": 0.2009830739975077, "learning_rate": 2.709281740010117e-05, "loss": 0.344, "num_tokens": 859927512.0, "step": 4503 }, { "epoch": 1.5372930534220857, "grad_norm": 0.2106441430517505, "learning_rate": 2.708649468892261e-05, "loss": 0.3188, "num_tokens": 860180453.0, "step": 4504 }, { "epoch": 1.5376344086021505, "grad_norm": 0.2812723726137917, "learning_rate": 2.7080171977744056e-05, "loss": 0.3256, "num_tokens": 860347434.0, "step": 4505 }, { "epoch": 1.5379757637822153, "grad_norm": 0.28267463076204574, "learning_rate": 2.7073849266565503e-05, "loss": 0.3054, "num_tokens": 860557641.0, "step": 4506 }, { "epoch": 1.5383171189622802, "grad_norm": 0.2182558199796126, "learning_rate": 2.7067526555386953e-05, "loss": 0.304, "num_tokens": 860718953.0, "step": 4507 }, { "epoch": 1.5386584741423452, "grad_norm": 0.22514629004028833, "learning_rate": 2.70612038442084e-05, "loss": 0.3004, "num_tokens": 860888439.0, "step": 4508 }, { "epoch": 1.5389998293224099, "grad_norm": 0.21805579861762087, "learning_rate": 2.7054881133029847e-05, "loss": 0.2901, "num_tokens": 861044317.0, "step": 4509 }, { "epoch": 1.539341184502475, "grad_norm": 0.21407552285857526, "learning_rate": 2.704855842185129e-05, "loss": 0.2905, "num_tokens": 861250511.0, "step": 4510 }, { "epoch": 1.5396825396825395, "grad_norm": 0.20977383730618246, "learning_rate": 2.7042235710672737e-05, "loss": 0.3151, "num_tokens": 861449593.0, "step": 4511 }, { "epoch": 1.5400238948626046, "grad_norm": 0.20122811754382214, "learning_rate": 2.7035912999494184e-05, "loss": 0.2751, "num_tokens": 861635888.0, "step": 4512 }, { "epoch": 1.5403652500426694, "grad_norm": 0.19081928382348273, "learning_rate": 2.702959028831563e-05, "loss": 0.3072, "num_tokens": 861842284.0, "step": 4513 }, { "epoch": 1.5407066052227343, "grad_norm": 0.23446054371093963, "learning_rate": 2.702326757713708e-05, "loss": 0.3207, "num_tokens": 862014822.0, "step": 4514 }, { "epoch": 1.5410479604027991, "grad_norm": 0.2124118999764032, "learning_rate": 2.7016944865958525e-05, "loss": 0.3332, "num_tokens": 862243806.0, "step": 4515 }, { "epoch": 1.541389315582864, "grad_norm": 0.2172661773945638, "learning_rate": 2.701062215477997e-05, "loss": 0.3233, "num_tokens": 862447973.0, "step": 4516 }, { "epoch": 1.5417306707629288, "grad_norm": 0.22317924788380553, "learning_rate": 2.7004299443601416e-05, "loss": 0.287, "num_tokens": 862627641.0, "step": 4517 }, { "epoch": 1.5420720259429936, "grad_norm": 0.20955105353972575, "learning_rate": 2.6997976732422863e-05, "loss": 0.3089, "num_tokens": 862800104.0, "step": 4518 }, { "epoch": 1.5424133811230587, "grad_norm": 0.2293764877467675, "learning_rate": 2.699165402124431e-05, "loss": 0.3002, "num_tokens": 863018287.0, "step": 4519 }, { "epoch": 1.5427547363031233, "grad_norm": 0.20443947030948176, "learning_rate": 2.698533131006576e-05, "loss": 0.322, "num_tokens": 863205257.0, "step": 4520 }, { "epoch": 1.5430960914831884, "grad_norm": 0.2137684387407668, "learning_rate": 2.6979008598887207e-05, "loss": 0.2944, "num_tokens": 863391150.0, "step": 4521 }, { "epoch": 1.543437446663253, "grad_norm": 0.23293308360231021, "learning_rate": 2.6972685887708648e-05, "loss": 0.297, "num_tokens": 863602164.0, "step": 4522 }, { "epoch": 1.543778801843318, "grad_norm": 0.20363369798501044, "learning_rate": 2.6966363176530095e-05, "loss": 0.3091, "num_tokens": 863818704.0, "step": 4523 }, { "epoch": 1.5441201570233827, "grad_norm": 0.21626005222410588, "learning_rate": 2.6960040465351545e-05, "loss": 0.3132, "num_tokens": 864012002.0, "step": 4524 }, { "epoch": 1.5444615122034477, "grad_norm": 0.2125224960736765, "learning_rate": 2.6953717754172992e-05, "loss": 0.3007, "num_tokens": 864203281.0, "step": 4525 }, { "epoch": 1.5448028673835126, "grad_norm": 0.22870446026508243, "learning_rate": 2.694739504299444e-05, "loss": 0.3022, "num_tokens": 864399710.0, "step": 4526 }, { "epoch": 1.5451442225635774, "grad_norm": 0.21917863457433762, "learning_rate": 2.6941072331815886e-05, "loss": 0.3133, "num_tokens": 864632190.0, "step": 4527 }, { "epoch": 1.5454855777436423, "grad_norm": 0.21449468355513143, "learning_rate": 2.693474962063733e-05, "loss": 0.3051, "num_tokens": 864797277.0, "step": 4528 }, { "epoch": 1.545826932923707, "grad_norm": 0.2186446389684702, "learning_rate": 2.6928426909458776e-05, "loss": 0.3329, "num_tokens": 864979172.0, "step": 4529 }, { "epoch": 1.546168288103772, "grad_norm": 0.22342025062208382, "learning_rate": 2.6922104198280223e-05, "loss": 0.3024, "num_tokens": 865159447.0, "step": 4530 }, { "epoch": 1.5465096432838368, "grad_norm": 0.211541013908762, "learning_rate": 2.691578148710167e-05, "loss": 0.3067, "num_tokens": 865371751.0, "step": 4531 }, { "epoch": 1.5468509984639018, "grad_norm": 0.2067336122226662, "learning_rate": 2.6909458775923117e-05, "loss": 0.2909, "num_tokens": 865560490.0, "step": 4532 }, { "epoch": 1.5471923536439665, "grad_norm": 0.21922669811081888, "learning_rate": 2.6903136064744568e-05, "loss": 0.2999, "num_tokens": 865740544.0, "step": 4533 }, { "epoch": 1.5475337088240315, "grad_norm": 0.21345857236340887, "learning_rate": 2.6896813353566008e-05, "loss": 0.3376, "num_tokens": 865938303.0, "step": 4534 }, { "epoch": 1.5478750640040961, "grad_norm": 0.19899448843116332, "learning_rate": 2.6890490642387455e-05, "loss": 0.3035, "num_tokens": 866137236.0, "step": 4535 }, { "epoch": 1.5482164191841612, "grad_norm": 0.23312522503860977, "learning_rate": 2.6884167931208902e-05, "loss": 0.3151, "num_tokens": 866317444.0, "step": 4536 }, { "epoch": 1.5485577743642258, "grad_norm": 0.23718262022530393, "learning_rate": 2.6877845220030352e-05, "loss": 0.3265, "num_tokens": 866505790.0, "step": 4537 }, { "epoch": 1.5488991295442909, "grad_norm": 0.2605877482810698, "learning_rate": 2.68715225088518e-05, "loss": 0.3116, "num_tokens": 866670029.0, "step": 4538 }, { "epoch": 1.5492404847243557, "grad_norm": 0.24246680610529608, "learning_rate": 2.6865199797673246e-05, "loss": 0.3432, "num_tokens": 866873399.0, "step": 4539 }, { "epoch": 1.5495818399044206, "grad_norm": 0.20757524967976917, "learning_rate": 2.685887708649469e-05, "loss": 0.3328, "num_tokens": 867070947.0, "step": 4540 }, { "epoch": 1.5499231950844854, "grad_norm": 0.19646905930771077, "learning_rate": 2.6852554375316137e-05, "loss": 0.3135, "num_tokens": 867263526.0, "step": 4541 }, { "epoch": 1.5502645502645502, "grad_norm": 0.2377311291224878, "learning_rate": 2.6846231664137584e-05, "loss": 0.3304, "num_tokens": 867452992.0, "step": 4542 }, { "epoch": 1.550605905444615, "grad_norm": 0.2547632551253583, "learning_rate": 2.683990895295903e-05, "loss": 0.3053, "num_tokens": 867622827.0, "step": 4543 }, { "epoch": 1.55094726062468, "grad_norm": 0.21249275740565682, "learning_rate": 2.6833586241780478e-05, "loss": 0.309, "num_tokens": 867795138.0, "step": 4544 }, { "epoch": 1.551288615804745, "grad_norm": 0.21113707535153264, "learning_rate": 2.6827263530601925e-05, "loss": 0.3042, "num_tokens": 867972041.0, "step": 4545 }, { "epoch": 1.5516299709848096, "grad_norm": 0.23676208321814046, "learning_rate": 2.682094081942337e-05, "loss": 0.2962, "num_tokens": 868163033.0, "step": 4546 }, { "epoch": 1.5519713261648747, "grad_norm": 0.1959390282338254, "learning_rate": 2.6814618108244815e-05, "loss": 0.3233, "num_tokens": 868340863.0, "step": 4547 }, { "epoch": 1.5523126813449393, "grad_norm": 0.20838492622893837, "learning_rate": 2.6808295397066262e-05, "loss": 0.3431, "num_tokens": 868570973.0, "step": 4548 }, { "epoch": 1.5526540365250043, "grad_norm": 0.20574828410554769, "learning_rate": 2.680197268588771e-05, "loss": 0.3143, "num_tokens": 868781481.0, "step": 4549 }, { "epoch": 1.5529953917050692, "grad_norm": 0.21751204983488517, "learning_rate": 2.679564997470916e-05, "loss": 0.3164, "num_tokens": 868994627.0, "step": 4550 }, { "epoch": 1.553336746885134, "grad_norm": 0.20888679704126734, "learning_rate": 2.6789327263530607e-05, "loss": 0.2945, "num_tokens": 869182507.0, "step": 4551 }, { "epoch": 1.5536781020651989, "grad_norm": 0.21686765790764348, "learning_rate": 2.6783004552352047e-05, "loss": 0.3052, "num_tokens": 869362824.0, "step": 4552 }, { "epoch": 1.5540194572452637, "grad_norm": 0.21369676777136354, "learning_rate": 2.6776681841173494e-05, "loss": 0.3453, "num_tokens": 869567761.0, "step": 4553 }, { "epoch": 1.5543608124253285, "grad_norm": 0.2055214614439539, "learning_rate": 2.6770359129994944e-05, "loss": 0.2884, "num_tokens": 869758365.0, "step": 4554 }, { "epoch": 1.5547021676053934, "grad_norm": 0.21890460065161782, "learning_rate": 2.676403641881639e-05, "loss": 0.323, "num_tokens": 869951733.0, "step": 4555 }, { "epoch": 1.5550435227854584, "grad_norm": 0.22084374697764714, "learning_rate": 2.6757713707637838e-05, "loss": 0.2948, "num_tokens": 870158627.0, "step": 4556 }, { "epoch": 1.555384877965523, "grad_norm": 0.18926218805003966, "learning_rate": 2.6751390996459285e-05, "loss": 0.3369, "num_tokens": 870357787.0, "step": 4557 }, { "epoch": 1.5557262331455881, "grad_norm": 0.2376671258039561, "learning_rate": 2.674506828528073e-05, "loss": 0.2845, "num_tokens": 870569884.0, "step": 4558 }, { "epoch": 1.5560675883256527, "grad_norm": 0.19979459218541287, "learning_rate": 2.6738745574102176e-05, "loss": 0.2968, "num_tokens": 870749984.0, "step": 4559 }, { "epoch": 1.5564089435057178, "grad_norm": 0.19886473646600306, "learning_rate": 2.6732422862923623e-05, "loss": 0.3052, "num_tokens": 870979623.0, "step": 4560 }, { "epoch": 1.5567502986857824, "grad_norm": 0.21527938287957604, "learning_rate": 2.672610015174507e-05, "loss": 0.2756, "num_tokens": 871175252.0, "step": 4561 }, { "epoch": 1.5570916538658475, "grad_norm": 0.19484880736053908, "learning_rate": 2.6719777440566517e-05, "loss": 0.3231, "num_tokens": 871412339.0, "step": 4562 }, { "epoch": 1.5574330090459123, "grad_norm": 0.22151859403967744, "learning_rate": 2.6713454729387967e-05, "loss": 0.3211, "num_tokens": 871623897.0, "step": 4563 }, { "epoch": 1.5577743642259771, "grad_norm": 0.19941288727095086, "learning_rate": 2.6707132018209407e-05, "loss": 0.3305, "num_tokens": 871849648.0, "step": 4564 }, { "epoch": 1.558115719406042, "grad_norm": 0.20720504178899865, "learning_rate": 2.6700809307030854e-05, "loss": 0.32, "num_tokens": 872043176.0, "step": 4565 }, { "epoch": 1.5584570745861068, "grad_norm": 0.214378677717043, "learning_rate": 2.66944865958523e-05, "loss": 0.3132, "num_tokens": 872243786.0, "step": 4566 }, { "epoch": 1.5587984297661717, "grad_norm": 0.2170156047642682, "learning_rate": 2.668816388467375e-05, "loss": 0.2978, "num_tokens": 872425102.0, "step": 4567 }, { "epoch": 1.5591397849462365, "grad_norm": 0.22059162657005701, "learning_rate": 2.66818411734952e-05, "loss": 0.2935, "num_tokens": 872581561.0, "step": 4568 }, { "epoch": 1.5594811401263016, "grad_norm": 0.22182214208524284, "learning_rate": 2.6675518462316646e-05, "loss": 0.3087, "num_tokens": 872763349.0, "step": 4569 }, { "epoch": 1.5598224953063662, "grad_norm": 0.20317799533454048, "learning_rate": 2.6669195751138086e-05, "loss": 0.3158, "num_tokens": 872973401.0, "step": 4570 }, { "epoch": 1.5601638504864312, "grad_norm": 0.23950958903690694, "learning_rate": 2.6662873039959536e-05, "loss": 0.3039, "num_tokens": 873153607.0, "step": 4571 }, { "epoch": 1.5605052056664959, "grad_norm": 0.1970863687439717, "learning_rate": 2.6656550328780983e-05, "loss": 0.2885, "num_tokens": 873328521.0, "step": 4572 }, { "epoch": 1.560846560846561, "grad_norm": 0.2356629429171142, "learning_rate": 2.665022761760243e-05, "loss": 0.2853, "num_tokens": 873510208.0, "step": 4573 }, { "epoch": 1.5611879160266255, "grad_norm": 0.20822803925923436, "learning_rate": 2.6643904906423877e-05, "loss": 0.3057, "num_tokens": 873711379.0, "step": 4574 }, { "epoch": 1.5615292712066906, "grad_norm": 0.25691235581026256, "learning_rate": 2.663758219524532e-05, "loss": 0.3142, "num_tokens": 873840669.0, "step": 4575 }, { "epoch": 1.5618706263867554, "grad_norm": 0.2355749768693562, "learning_rate": 2.6631259484066768e-05, "loss": 0.3077, "num_tokens": 874034004.0, "step": 4576 }, { "epoch": 1.5622119815668203, "grad_norm": 0.24691960462278817, "learning_rate": 2.6624936772888215e-05, "loss": 0.3209, "num_tokens": 874224290.0, "step": 4577 }, { "epoch": 1.5625533367468851, "grad_norm": 0.2179263997274666, "learning_rate": 2.661861406170966e-05, "loss": 0.3155, "num_tokens": 874421336.0, "step": 4578 }, { "epoch": 1.56289469192695, "grad_norm": 0.24559687617531106, "learning_rate": 2.661229135053111e-05, "loss": 0.3055, "num_tokens": 874582936.0, "step": 4579 }, { "epoch": 1.5632360471070148, "grad_norm": 0.20617864022913637, "learning_rate": 2.660596863935256e-05, "loss": 0.301, "num_tokens": 874814184.0, "step": 4580 }, { "epoch": 1.5635774022870796, "grad_norm": 0.25751786887103484, "learning_rate": 2.6599645928174e-05, "loss": 0.3278, "num_tokens": 875012609.0, "step": 4581 }, { "epoch": 1.5639187574671447, "grad_norm": 0.22810466347396785, "learning_rate": 2.6593323216995446e-05, "loss": 0.3561, "num_tokens": 875203837.0, "step": 4582 }, { "epoch": 1.5642601126472093, "grad_norm": 0.23972400521606457, "learning_rate": 2.6587000505816893e-05, "loss": 0.3124, "num_tokens": 875412987.0, "step": 4583 }, { "epoch": 1.5646014678272744, "grad_norm": 0.21566268109153527, "learning_rate": 2.6580677794638344e-05, "loss": 0.2934, "num_tokens": 875585457.0, "step": 4584 }, { "epoch": 1.564942823007339, "grad_norm": 0.21184401070693595, "learning_rate": 2.657435508345979e-05, "loss": 0.3233, "num_tokens": 875800120.0, "step": 4585 }, { "epoch": 1.565284178187404, "grad_norm": 0.22736138582166, "learning_rate": 2.6568032372281237e-05, "loss": 0.3189, "num_tokens": 876012408.0, "step": 4586 }, { "epoch": 1.565625533367469, "grad_norm": 0.22722015502385615, "learning_rate": 2.656170966110268e-05, "loss": 0.3052, "num_tokens": 876175438.0, "step": 4587 }, { "epoch": 1.5659668885475337, "grad_norm": 0.21312836094807092, "learning_rate": 2.6555386949924128e-05, "loss": 0.3078, "num_tokens": 876386503.0, "step": 4588 }, { "epoch": 1.5663082437275986, "grad_norm": 0.2262254145696431, "learning_rate": 2.6549064238745575e-05, "loss": 0.3316, "num_tokens": 876583434.0, "step": 4589 }, { "epoch": 1.5666495989076634, "grad_norm": 0.23847328813273366, "learning_rate": 2.6542741527567022e-05, "loss": 0.2688, "num_tokens": 876739986.0, "step": 4590 }, { "epoch": 1.5669909540877283, "grad_norm": 0.23838644970307002, "learning_rate": 2.653641881638847e-05, "loss": 0.2962, "num_tokens": 876965364.0, "step": 4591 }, { "epoch": 1.567332309267793, "grad_norm": 0.2033599953641468, "learning_rate": 2.6530096105209916e-05, "loss": 0.2974, "num_tokens": 877143686.0, "step": 4592 }, { "epoch": 1.5676736644478582, "grad_norm": 0.22625181745289005, "learning_rate": 2.652377339403136e-05, "loss": 0.3111, "num_tokens": 877341843.0, "step": 4593 }, { "epoch": 1.5680150196279228, "grad_norm": 0.23461163808877325, "learning_rate": 2.6517450682852807e-05, "loss": 0.3026, "num_tokens": 877528364.0, "step": 4594 }, { "epoch": 1.5683563748079878, "grad_norm": 0.20225103700550795, "learning_rate": 2.6511127971674254e-05, "loss": 0.3046, "num_tokens": 877761751.0, "step": 4595 }, { "epoch": 1.5686977299880525, "grad_norm": 0.20969504421935342, "learning_rate": 2.65048052604957e-05, "loss": 0.3405, "num_tokens": 877979171.0, "step": 4596 }, { "epoch": 1.5690390851681175, "grad_norm": 0.2552923301110797, "learning_rate": 2.649848254931715e-05, "loss": 0.2775, "num_tokens": 878098663.0, "step": 4597 }, { "epoch": 1.5693804403481821, "grad_norm": 0.2668336544207222, "learning_rate": 2.6492159838138598e-05, "loss": 0.3269, "num_tokens": 878320653.0, "step": 4598 }, { "epoch": 1.5697217955282472, "grad_norm": 0.1805049764576971, "learning_rate": 2.6485837126960038e-05, "loss": 0.2739, "num_tokens": 878480305.0, "step": 4599 }, { "epoch": 1.570063150708312, "grad_norm": 0.23857397165681607, "learning_rate": 2.6479514415781485e-05, "loss": 0.3082, "num_tokens": 878617665.0, "step": 4600 }, { "epoch": 1.5704045058883769, "grad_norm": 0.2516079463098889, "learning_rate": 2.6473191704602935e-05, "loss": 0.2983, "num_tokens": 878816696.0, "step": 4601 }, { "epoch": 1.5707458610684417, "grad_norm": 0.22544228980524877, "learning_rate": 2.6466868993424382e-05, "loss": 0.3143, "num_tokens": 878974320.0, "step": 4602 }, { "epoch": 1.5710872162485066, "grad_norm": 0.2586061503216372, "learning_rate": 2.646054628224583e-05, "loss": 0.3363, "num_tokens": 879187524.0, "step": 4603 }, { "epoch": 1.5714285714285714, "grad_norm": 0.21465402451052082, "learning_rate": 2.6454223571067276e-05, "loss": 0.2909, "num_tokens": 879361808.0, "step": 4604 }, { "epoch": 1.5717699266086362, "grad_norm": 0.20397487974767375, "learning_rate": 2.644790085988872e-05, "loss": 0.3364, "num_tokens": 879596906.0, "step": 4605 }, { "epoch": 1.5721112817887013, "grad_norm": 0.23769010024427079, "learning_rate": 2.6441578148710167e-05, "loss": 0.3067, "num_tokens": 879759395.0, "step": 4606 }, { "epoch": 1.572452636968766, "grad_norm": 0.24046843779133087, "learning_rate": 2.6435255437531614e-05, "loss": 0.3266, "num_tokens": 879946881.0, "step": 4607 }, { "epoch": 1.572793992148831, "grad_norm": 0.23299826339786042, "learning_rate": 2.642893272635306e-05, "loss": 0.329, "num_tokens": 880133981.0, "step": 4608 }, { "epoch": 1.5731353473288956, "grad_norm": 0.24191267212843465, "learning_rate": 2.6422610015174508e-05, "loss": 0.3122, "num_tokens": 880298996.0, "step": 4609 }, { "epoch": 1.5734767025089607, "grad_norm": 0.22114829944054468, "learning_rate": 2.641628730399596e-05, "loss": 0.2983, "num_tokens": 880534849.0, "step": 4610 }, { "epoch": 1.5738180576890253, "grad_norm": 0.25397614015391995, "learning_rate": 2.64099645928174e-05, "loss": 0.2968, "num_tokens": 880719782.0, "step": 4611 }, { "epoch": 1.5741594128690903, "grad_norm": 0.24416503364340025, "learning_rate": 2.6403641881638845e-05, "loss": 0.297, "num_tokens": 880925018.0, "step": 4612 }, { "epoch": 1.5745007680491552, "grad_norm": 0.22272042027200614, "learning_rate": 2.6397319170460292e-05, "loss": 0.3425, "num_tokens": 881138192.0, "step": 4613 }, { "epoch": 1.57484212322922, "grad_norm": 0.20501566843623845, "learning_rate": 2.6390996459281743e-05, "loss": 0.2969, "num_tokens": 881325365.0, "step": 4614 }, { "epoch": 1.5751834784092849, "grad_norm": 0.20393571063089708, "learning_rate": 2.638467374810319e-05, "loss": 0.3053, "num_tokens": 881524398.0, "step": 4615 }, { "epoch": 1.5755248335893497, "grad_norm": 0.24354662395185347, "learning_rate": 2.6378351036924637e-05, "loss": 0.3172, "num_tokens": 881742144.0, "step": 4616 }, { "epoch": 1.5758661887694145, "grad_norm": 0.2305524504690414, "learning_rate": 2.637202832574608e-05, "loss": 0.3339, "num_tokens": 881900158.0, "step": 4617 }, { "epoch": 1.5762075439494794, "grad_norm": 0.20817598760650785, "learning_rate": 2.6365705614567527e-05, "loss": 0.3013, "num_tokens": 882086748.0, "step": 4618 }, { "epoch": 1.5765488991295444, "grad_norm": 0.24573417586995155, "learning_rate": 2.6359382903388974e-05, "loss": 0.3552, "num_tokens": 882264527.0, "step": 4619 }, { "epoch": 1.576890254309609, "grad_norm": 0.2313813880088019, "learning_rate": 2.635306019221042e-05, "loss": 0.3147, "num_tokens": 882453715.0, "step": 4620 }, { "epoch": 1.5772316094896741, "grad_norm": 0.20320128829392073, "learning_rate": 2.634673748103187e-05, "loss": 0.2987, "num_tokens": 882648012.0, "step": 4621 }, { "epoch": 1.5775729646697387, "grad_norm": 0.24511853437709694, "learning_rate": 2.6340414769853315e-05, "loss": 0.3345, "num_tokens": 882791382.0, "step": 4622 }, { "epoch": 1.5779143198498038, "grad_norm": 0.24592828203549308, "learning_rate": 2.633409205867476e-05, "loss": 0.3332, "num_tokens": 883011003.0, "step": 4623 }, { "epoch": 1.5782556750298686, "grad_norm": 0.2228850051478623, "learning_rate": 2.6327769347496206e-05, "loss": 0.3424, "num_tokens": 883241357.0, "step": 4624 }, { "epoch": 1.5785970302099335, "grad_norm": 0.1880912203028773, "learning_rate": 2.6321446636317653e-05, "loss": 0.2955, "num_tokens": 883408636.0, "step": 4625 }, { "epoch": 1.5789383853899983, "grad_norm": 0.22221239637989926, "learning_rate": 2.63151239251391e-05, "loss": 0.3254, "num_tokens": 883595601.0, "step": 4626 }, { "epoch": 1.5792797405700632, "grad_norm": 0.22286592367980296, "learning_rate": 2.630880121396055e-05, "loss": 0.3001, "num_tokens": 883778004.0, "step": 4627 }, { "epoch": 1.579621095750128, "grad_norm": 0.18797186472633615, "learning_rate": 2.6302478502781997e-05, "loss": 0.3076, "num_tokens": 883994670.0, "step": 4628 }, { "epoch": 1.5799624509301928, "grad_norm": 0.2132364477105189, "learning_rate": 2.6296155791603437e-05, "loss": 0.3274, "num_tokens": 884189106.0, "step": 4629 }, { "epoch": 1.5803038061102577, "grad_norm": 0.2078482219176055, "learning_rate": 2.6289833080424884e-05, "loss": 0.2884, "num_tokens": 884361523.0, "step": 4630 }, { "epoch": 1.5806451612903225, "grad_norm": 0.24880448423368287, "learning_rate": 2.6283510369246335e-05, "loss": 0.334, "num_tokens": 884548286.0, "step": 4631 }, { "epoch": 1.5809865164703876, "grad_norm": 0.21271221846808908, "learning_rate": 2.6277187658067782e-05, "loss": 0.3357, "num_tokens": 884743664.0, "step": 4632 }, { "epoch": 1.5813278716504522, "grad_norm": 0.2579521709424828, "learning_rate": 2.627086494688923e-05, "loss": 0.3061, "num_tokens": 884895148.0, "step": 4633 }, { "epoch": 1.5816692268305172, "grad_norm": 0.209977587834458, "learning_rate": 2.6264542235710676e-05, "loss": 0.3307, "num_tokens": 885084969.0, "step": 4634 }, { "epoch": 1.5820105820105819, "grad_norm": 0.26586921083413445, "learning_rate": 2.625821952453212e-05, "loss": 0.3185, "num_tokens": 885286111.0, "step": 4635 }, { "epoch": 1.582351937190647, "grad_norm": 0.20906178820947163, "learning_rate": 2.6251896813353566e-05, "loss": 0.302, "num_tokens": 885493565.0, "step": 4636 }, { "epoch": 1.5826932923707118, "grad_norm": 0.21308767624134325, "learning_rate": 2.6245574102175013e-05, "loss": 0.3096, "num_tokens": 885684201.0, "step": 4637 }, { "epoch": 1.5830346475507766, "grad_norm": 0.2121136734136316, "learning_rate": 2.623925139099646e-05, "loss": 0.3289, "num_tokens": 885871910.0, "step": 4638 }, { "epoch": 1.5833760027308414, "grad_norm": 0.2422788395918281, "learning_rate": 2.6232928679817907e-05, "loss": 0.2863, "num_tokens": 886035256.0, "step": 4639 }, { "epoch": 1.5837173579109063, "grad_norm": 0.19457768785110105, "learning_rate": 2.6226605968639358e-05, "loss": 0.281, "num_tokens": 886209969.0, "step": 4640 }, { "epoch": 1.5840587130909711, "grad_norm": 0.21221504969152596, "learning_rate": 2.6220283257460798e-05, "loss": 0.3013, "num_tokens": 886385839.0, "step": 4641 }, { "epoch": 1.584400068271036, "grad_norm": 0.22657027807833421, "learning_rate": 2.6213960546282245e-05, "loss": 0.311, "num_tokens": 886586741.0, "step": 4642 }, { "epoch": 1.584741423451101, "grad_norm": 0.21922371817793854, "learning_rate": 2.6207637835103692e-05, "loss": 0.3067, "num_tokens": 886805540.0, "step": 4643 }, { "epoch": 1.5850827786311656, "grad_norm": 0.21610931367145436, "learning_rate": 2.6201315123925142e-05, "loss": 0.3013, "num_tokens": 886990459.0, "step": 4644 }, { "epoch": 1.5854241338112307, "grad_norm": 0.20956666817968292, "learning_rate": 2.619499241274659e-05, "loss": 0.3115, "num_tokens": 887178496.0, "step": 4645 }, { "epoch": 1.5857654889912953, "grad_norm": 0.21396553792694933, "learning_rate": 2.6188669701568036e-05, "loss": 0.2819, "num_tokens": 887347031.0, "step": 4646 }, { "epoch": 1.5861068441713604, "grad_norm": 0.21280582048371358, "learning_rate": 2.6182346990389476e-05, "loss": 0.3227, "num_tokens": 887534451.0, "step": 4647 }, { "epoch": 1.586448199351425, "grad_norm": 0.22423435274922132, "learning_rate": 2.6176024279210927e-05, "loss": 0.3099, "num_tokens": 887754095.0, "step": 4648 }, { "epoch": 1.58678955453149, "grad_norm": 0.1932618659649692, "learning_rate": 2.6169701568032374e-05, "loss": 0.2982, "num_tokens": 887981952.0, "step": 4649 }, { "epoch": 1.587130909711555, "grad_norm": 0.21180215456467272, "learning_rate": 2.616337885685382e-05, "loss": 0.2848, "num_tokens": 888162674.0, "step": 4650 }, { "epoch": 1.5874722648916197, "grad_norm": 0.21307870186127198, "learning_rate": 2.6157056145675268e-05, "loss": 0.2965, "num_tokens": 888337018.0, "step": 4651 }, { "epoch": 1.5878136200716846, "grad_norm": 0.23614323792461875, "learning_rate": 2.6150733434496715e-05, "loss": 0.3106, "num_tokens": 888536126.0, "step": 4652 }, { "epoch": 1.5881549752517494, "grad_norm": 0.2187516630646418, "learning_rate": 2.6144410723318158e-05, "loss": 0.3022, "num_tokens": 888750119.0, "step": 4653 }, { "epoch": 1.5884963304318143, "grad_norm": 0.2006863373718327, "learning_rate": 2.6138088012139605e-05, "loss": 0.3052, "num_tokens": 888942724.0, "step": 4654 }, { "epoch": 1.588837685611879, "grad_norm": 0.23149343670346992, "learning_rate": 2.6131765300961052e-05, "loss": 0.2912, "num_tokens": 889136215.0, "step": 4655 }, { "epoch": 1.5891790407919442, "grad_norm": 0.20553981303196972, "learning_rate": 2.61254425897825e-05, "loss": 0.2809, "num_tokens": 889313733.0, "step": 4656 }, { "epoch": 1.5895203959720088, "grad_norm": 0.22157789347186205, "learning_rate": 2.611911987860395e-05, "loss": 0.316, "num_tokens": 889477130.0, "step": 4657 }, { "epoch": 1.5898617511520738, "grad_norm": 0.22822366067635508, "learning_rate": 2.6112797167425397e-05, "loss": 0.3209, "num_tokens": 889689203.0, "step": 4658 }, { "epoch": 1.5902031063321385, "grad_norm": 0.23318961684107736, "learning_rate": 2.6106474456246837e-05, "loss": 0.3046, "num_tokens": 889880594.0, "step": 4659 }, { "epoch": 1.5905444615122035, "grad_norm": 0.210836261341172, "learning_rate": 2.6100151745068284e-05, "loss": 0.3225, "num_tokens": 890088948.0, "step": 4660 }, { "epoch": 1.5908858166922681, "grad_norm": 0.23084138504323828, "learning_rate": 2.6093829033889734e-05, "loss": 0.2992, "num_tokens": 890258801.0, "step": 4661 }, { "epoch": 1.5912271718723332, "grad_norm": 0.20930218749975388, "learning_rate": 2.608750632271118e-05, "loss": 0.2958, "num_tokens": 890441495.0, "step": 4662 }, { "epoch": 1.591568527052398, "grad_norm": 0.21322113617070032, "learning_rate": 2.6081183611532628e-05, "loss": 0.3244, "num_tokens": 890645678.0, "step": 4663 }, { "epoch": 1.5919098822324629, "grad_norm": 0.21151696645096704, "learning_rate": 2.6074860900354075e-05, "loss": 0.3245, "num_tokens": 890884441.0, "step": 4664 }, { "epoch": 1.5922512374125277, "grad_norm": 0.20192308552560992, "learning_rate": 2.606853818917552e-05, "loss": 0.2921, "num_tokens": 891085272.0, "step": 4665 }, { "epoch": 1.5925925925925926, "grad_norm": 0.2149476212669073, "learning_rate": 2.6062215477996966e-05, "loss": 0.3015, "num_tokens": 891317871.0, "step": 4666 }, { "epoch": 1.5929339477726574, "grad_norm": 0.2131713088159877, "learning_rate": 2.6055892766818413e-05, "loss": 0.2991, "num_tokens": 891492984.0, "step": 4667 }, { "epoch": 1.5932753029527222, "grad_norm": 0.20981350778041655, "learning_rate": 2.604957005563986e-05, "loss": 0.3078, "num_tokens": 891657594.0, "step": 4668 }, { "epoch": 1.5936166581327873, "grad_norm": 0.21717975395139533, "learning_rate": 2.6043247344461307e-05, "loss": 0.3008, "num_tokens": 891889101.0, "step": 4669 }, { "epoch": 1.593958013312852, "grad_norm": 0.21732191308202142, "learning_rate": 2.6036924633282757e-05, "loss": 0.313, "num_tokens": 892075410.0, "step": 4670 }, { "epoch": 1.594299368492917, "grad_norm": 0.22439018188673485, "learning_rate": 2.6030601922104197e-05, "loss": 0.2909, "num_tokens": 892256853.0, "step": 4671 }, { "epoch": 1.5946407236729816, "grad_norm": 0.2186891810519187, "learning_rate": 2.6024279210925644e-05, "loss": 0.3288, "num_tokens": 892468647.0, "step": 4672 }, { "epoch": 1.5949820788530467, "grad_norm": 0.21970795110848335, "learning_rate": 2.601795649974709e-05, "loss": 0.2949, "num_tokens": 892630917.0, "step": 4673 }, { "epoch": 1.5953234340331115, "grad_norm": 0.22209462190966148, "learning_rate": 2.601163378856854e-05, "loss": 0.2785, "num_tokens": 892811605.0, "step": 4674 }, { "epoch": 1.5956647892131763, "grad_norm": 0.21511921167427883, "learning_rate": 2.600531107738999e-05, "loss": 0.2984, "num_tokens": 893004102.0, "step": 4675 }, { "epoch": 1.5960061443932412, "grad_norm": 0.22206536336154592, "learning_rate": 2.5998988366211435e-05, "loss": 0.319, "num_tokens": 893181396.0, "step": 4676 }, { "epoch": 1.596347499573306, "grad_norm": 0.23470428538041366, "learning_rate": 2.5992665655032876e-05, "loss": 0.3363, "num_tokens": 893399171.0, "step": 4677 }, { "epoch": 1.5966888547533709, "grad_norm": 0.21994739244535494, "learning_rate": 2.5986342943854326e-05, "loss": 0.2985, "num_tokens": 893567479.0, "step": 4678 }, { "epoch": 1.5970302099334357, "grad_norm": 0.24458552317256005, "learning_rate": 2.5980020232675773e-05, "loss": 0.2991, "num_tokens": 893735960.0, "step": 4679 }, { "epoch": 1.5973715651135008, "grad_norm": 0.2157499952167886, "learning_rate": 2.597369752149722e-05, "loss": 0.3163, "num_tokens": 893937188.0, "step": 4680 }, { "epoch": 1.5977129202935654, "grad_norm": 0.22197013776727353, "learning_rate": 2.5967374810318667e-05, "loss": 0.2815, "num_tokens": 894098108.0, "step": 4681 }, { "epoch": 1.5980542754736304, "grad_norm": 0.2173304526434984, "learning_rate": 2.5961052099140114e-05, "loss": 0.2969, "num_tokens": 894301765.0, "step": 4682 }, { "epoch": 1.598395630653695, "grad_norm": 0.19811550414602877, "learning_rate": 2.5954729387961558e-05, "loss": 0.2861, "num_tokens": 894509556.0, "step": 4683 }, { "epoch": 1.5987369858337601, "grad_norm": 0.2059377969174046, "learning_rate": 2.5948406676783005e-05, "loss": 0.3114, "num_tokens": 894687421.0, "step": 4684 }, { "epoch": 1.5990783410138247, "grad_norm": 0.2205565929769403, "learning_rate": 2.594208396560445e-05, "loss": 0.3161, "num_tokens": 894879617.0, "step": 4685 }, { "epoch": 1.5994196961938898, "grad_norm": 0.212077110492192, "learning_rate": 2.59357612544259e-05, "loss": 0.3266, "num_tokens": 895074997.0, "step": 4686 }, { "epoch": 1.5997610513739546, "grad_norm": 0.21618872496826577, "learning_rate": 2.592943854324735e-05, "loss": 0.3033, "num_tokens": 895255378.0, "step": 4687 }, { "epoch": 1.6001024065540195, "grad_norm": 0.23313674252140745, "learning_rate": 2.5923115832068796e-05, "loss": 0.2963, "num_tokens": 895468855.0, "step": 4688 }, { "epoch": 1.6004437617340843, "grad_norm": 0.219217966646151, "learning_rate": 2.5916793120890236e-05, "loss": 0.3303, "num_tokens": 895664800.0, "step": 4689 }, { "epoch": 1.6007851169141492, "grad_norm": 0.2247623016247074, "learning_rate": 2.5910470409711683e-05, "loss": 0.3179, "num_tokens": 895866543.0, "step": 4690 }, { "epoch": 1.601126472094214, "grad_norm": 0.2175705874595448, "learning_rate": 2.5904147698533133e-05, "loss": 0.3186, "num_tokens": 896082812.0, "step": 4691 }, { "epoch": 1.6014678272742788, "grad_norm": 0.23489946952872076, "learning_rate": 2.589782498735458e-05, "loss": 0.3262, "num_tokens": 896301310.0, "step": 4692 }, { "epoch": 1.601809182454344, "grad_norm": 0.21415320669785748, "learning_rate": 2.5891502276176027e-05, "loss": 0.2927, "num_tokens": 896487703.0, "step": 4693 }, { "epoch": 1.6021505376344085, "grad_norm": 0.20941892750219518, "learning_rate": 2.5885179564997474e-05, "loss": 0.3013, "num_tokens": 896661855.0, "step": 4694 }, { "epoch": 1.6024918928144736, "grad_norm": 0.21310728383819136, "learning_rate": 2.5878856853818918e-05, "loss": 0.3173, "num_tokens": 896887742.0, "step": 4695 }, { "epoch": 1.6028332479945382, "grad_norm": 0.1950916320363407, "learning_rate": 2.5872534142640365e-05, "loss": 0.298, "num_tokens": 897066801.0, "step": 4696 }, { "epoch": 1.6031746031746033, "grad_norm": 0.21687519878372383, "learning_rate": 2.5866211431461812e-05, "loss": 0.3216, "num_tokens": 897268606.0, "step": 4697 }, { "epoch": 1.6035159583546679, "grad_norm": 0.2586620008886513, "learning_rate": 2.585988872028326e-05, "loss": 0.2921, "num_tokens": 897439968.0, "step": 4698 }, { "epoch": 1.603857313534733, "grad_norm": 0.22182062140758582, "learning_rate": 2.5853566009104706e-05, "loss": 0.2942, "num_tokens": 897575465.0, "step": 4699 }, { "epoch": 1.6041986687147978, "grad_norm": 0.2350542372831141, "learning_rate": 2.5847243297926156e-05, "loss": 0.3053, "num_tokens": 897783458.0, "step": 4700 }, { "epoch": 1.6045400238948626, "grad_norm": 0.20787685851731455, "learning_rate": 2.5840920586747596e-05, "loss": 0.3087, "num_tokens": 897981956.0, "step": 4701 }, { "epoch": 1.6048813790749274, "grad_norm": 0.20736047703403576, "learning_rate": 2.5834597875569043e-05, "loss": 0.2889, "num_tokens": 898183438.0, "step": 4702 }, { "epoch": 1.6052227342549923, "grad_norm": 0.23210300064327624, "learning_rate": 2.582827516439049e-05, "loss": 0.272, "num_tokens": 898338843.0, "step": 4703 }, { "epoch": 1.6055640894350571, "grad_norm": 0.2406385812598667, "learning_rate": 2.582195245321194e-05, "loss": 0.3053, "num_tokens": 898500725.0, "step": 4704 }, { "epoch": 1.605905444615122, "grad_norm": 0.23799204535301097, "learning_rate": 2.5815629742033388e-05, "loss": 0.3227, "num_tokens": 898694201.0, "step": 4705 }, { "epoch": 1.606246799795187, "grad_norm": 0.20065573137687687, "learning_rate": 2.5809307030854828e-05, "loss": 0.307, "num_tokens": 898909139.0, "step": 4706 }, { "epoch": 1.6065881549752516, "grad_norm": 0.25308037886009643, "learning_rate": 2.5802984319676275e-05, "loss": 0.3125, "num_tokens": 899077436.0, "step": 4707 }, { "epoch": 1.6069295101553167, "grad_norm": 0.22505156727630193, "learning_rate": 2.5796661608497725e-05, "loss": 0.3175, "num_tokens": 899268885.0, "step": 4708 }, { "epoch": 1.6072708653353813, "grad_norm": 0.2156994183102407, "learning_rate": 2.5790338897319172e-05, "loss": 0.3176, "num_tokens": 899491718.0, "step": 4709 }, { "epoch": 1.6076122205154464, "grad_norm": 0.22630983183770617, "learning_rate": 2.578401618614062e-05, "loss": 0.3064, "num_tokens": 899687565.0, "step": 4710 }, { "epoch": 1.6079535756955112, "grad_norm": 0.2006906866459658, "learning_rate": 2.5777693474962066e-05, "loss": 0.2639, "num_tokens": 899878330.0, "step": 4711 }, { "epoch": 1.608294930875576, "grad_norm": 0.1990326091994501, "learning_rate": 2.577137076378351e-05, "loss": 0.3276, "num_tokens": 900077834.0, "step": 4712 }, { "epoch": 1.608636286055641, "grad_norm": 0.21949655233422533, "learning_rate": 2.5765048052604957e-05, "loss": 0.2997, "num_tokens": 900281242.0, "step": 4713 }, { "epoch": 1.6089776412357057, "grad_norm": 0.24904455047839744, "learning_rate": 2.5758725341426404e-05, "loss": 0.2926, "num_tokens": 900454418.0, "step": 4714 }, { "epoch": 1.6093189964157706, "grad_norm": 0.20360608508581482, "learning_rate": 2.575240263024785e-05, "loss": 0.3015, "num_tokens": 900644919.0, "step": 4715 }, { "epoch": 1.6096603515958354, "grad_norm": 0.2518034138560579, "learning_rate": 2.5746079919069298e-05, "loss": 0.3085, "num_tokens": 900836085.0, "step": 4716 }, { "epoch": 1.6100017067759005, "grad_norm": 0.23743610653349354, "learning_rate": 2.5739757207890748e-05, "loss": 0.311, "num_tokens": 900981750.0, "step": 4717 }, { "epoch": 1.610343061955965, "grad_norm": 0.25681232279916305, "learning_rate": 2.573343449671219e-05, "loss": 0.3231, "num_tokens": 901157791.0, "step": 4718 }, { "epoch": 1.6106844171360302, "grad_norm": 0.21507742912155198, "learning_rate": 2.5727111785533635e-05, "loss": 0.2931, "num_tokens": 901321648.0, "step": 4719 }, { "epoch": 1.6110257723160948, "grad_norm": 0.2442864901309982, "learning_rate": 2.5720789074355082e-05, "loss": 0.3151, "num_tokens": 901472331.0, "step": 4720 }, { "epoch": 1.6113671274961598, "grad_norm": 0.23838025450925204, "learning_rate": 2.5714466363176533e-05, "loss": 0.3348, "num_tokens": 901668062.0, "step": 4721 }, { "epoch": 1.6117084826762245, "grad_norm": 0.20264775056759465, "learning_rate": 2.570814365199798e-05, "loss": 0.2994, "num_tokens": 901859500.0, "step": 4722 }, { "epoch": 1.6120498378562895, "grad_norm": 0.22694870785483912, "learning_rate": 2.5701820940819427e-05, "loss": 0.3305, "num_tokens": 902096686.0, "step": 4723 }, { "epoch": 1.6123911930363544, "grad_norm": 0.20668142069188258, "learning_rate": 2.5695498229640867e-05, "loss": 0.2904, "num_tokens": 902251516.0, "step": 4724 }, { "epoch": 1.6127325482164192, "grad_norm": 0.21790845558802413, "learning_rate": 2.5689175518462317e-05, "loss": 0.3218, "num_tokens": 902488781.0, "step": 4725 }, { "epoch": 1.613073903396484, "grad_norm": 0.21235020545469463, "learning_rate": 2.5682852807283764e-05, "loss": 0.2909, "num_tokens": 902683237.0, "step": 4726 }, { "epoch": 1.6134152585765489, "grad_norm": 0.21635287653164334, "learning_rate": 2.567653009610521e-05, "loss": 0.302, "num_tokens": 902860869.0, "step": 4727 }, { "epoch": 1.6137566137566137, "grad_norm": 0.20164798857933428, "learning_rate": 2.5670207384926658e-05, "loss": 0.3201, "num_tokens": 903049719.0, "step": 4728 }, { "epoch": 1.6140979689366786, "grad_norm": 0.21671706571285718, "learning_rate": 2.5663884673748105e-05, "loss": 0.3159, "num_tokens": 903238678.0, "step": 4729 }, { "epoch": 1.6144393241167436, "grad_norm": 0.23918331594903708, "learning_rate": 2.565756196256955e-05, "loss": 0.2885, "num_tokens": 903430866.0, "step": 4730 }, { "epoch": 1.6147806792968082, "grad_norm": 0.19719679827550757, "learning_rate": 2.5651239251390996e-05, "loss": 0.3152, "num_tokens": 903603550.0, "step": 4731 }, { "epoch": 1.6151220344768733, "grad_norm": 0.2175901257335419, "learning_rate": 2.5644916540212443e-05, "loss": 0.2917, "num_tokens": 903766620.0, "step": 4732 }, { "epoch": 1.615463389656938, "grad_norm": 0.23710008867538798, "learning_rate": 2.563859382903389e-05, "loss": 0.3328, "num_tokens": 903949697.0, "step": 4733 }, { "epoch": 1.615804744837003, "grad_norm": 0.2296169286913506, "learning_rate": 2.563227111785534e-05, "loss": 0.3371, "num_tokens": 904210062.0, "step": 4734 }, { "epoch": 1.6161461000170676, "grad_norm": 0.2134382683531619, "learning_rate": 2.5625948406676787e-05, "loss": 0.3125, "num_tokens": 904381262.0, "step": 4735 }, { "epoch": 1.6164874551971327, "grad_norm": 0.22384384724536785, "learning_rate": 2.5619625695498227e-05, "loss": 0.2974, "num_tokens": 904592491.0, "step": 4736 }, { "epoch": 1.6168288103771975, "grad_norm": 0.20378962251835947, "learning_rate": 2.5613302984319674e-05, "loss": 0.2719, "num_tokens": 904762843.0, "step": 4737 }, { "epoch": 1.6171701655572623, "grad_norm": 0.1873619726635261, "learning_rate": 2.5606980273141125e-05, "loss": 0.3241, "num_tokens": 904980888.0, "step": 4738 }, { "epoch": 1.6175115207373272, "grad_norm": 0.2597414956296985, "learning_rate": 2.560065756196257e-05, "loss": 0.3175, "num_tokens": 905154158.0, "step": 4739 }, { "epoch": 1.617852875917392, "grad_norm": 0.229759601363005, "learning_rate": 2.559433485078402e-05, "loss": 0.3321, "num_tokens": 905352241.0, "step": 4740 }, { "epoch": 1.6181942310974569, "grad_norm": 0.23568546480347088, "learning_rate": 2.5588012139605466e-05, "loss": 0.3081, "num_tokens": 905552461.0, "step": 4741 }, { "epoch": 1.6185355862775217, "grad_norm": 0.24421895103272584, "learning_rate": 2.558168942842691e-05, "loss": 0.3099, "num_tokens": 905722234.0, "step": 4742 }, { "epoch": 1.6188769414575868, "grad_norm": 0.2153810614860051, "learning_rate": 2.5575366717248356e-05, "loss": 0.2867, "num_tokens": 905914122.0, "step": 4743 }, { "epoch": 1.6192182966376514, "grad_norm": 0.21912050844001255, "learning_rate": 2.5569044006069803e-05, "loss": 0.3192, "num_tokens": 906063072.0, "step": 4744 }, { "epoch": 1.6195596518177164, "grad_norm": 0.2363210646618019, "learning_rate": 2.556272129489125e-05, "loss": 0.2922, "num_tokens": 906279842.0, "step": 4745 }, { "epoch": 1.619901006997781, "grad_norm": 0.2639247007060616, "learning_rate": 2.5556398583712697e-05, "loss": 0.307, "num_tokens": 906463479.0, "step": 4746 }, { "epoch": 1.6202423621778461, "grad_norm": 0.21330475498737886, "learning_rate": 2.5550075872534148e-05, "loss": 0.338, "num_tokens": 906650899.0, "step": 4747 }, { "epoch": 1.620583717357911, "grad_norm": 0.2181213014167926, "learning_rate": 2.5543753161355588e-05, "loss": 0.3137, "num_tokens": 906880663.0, "step": 4748 }, { "epoch": 1.6209250725379758, "grad_norm": 0.21581266594250523, "learning_rate": 2.5537430450177035e-05, "loss": 0.3306, "num_tokens": 907072261.0, "step": 4749 }, { "epoch": 1.6212664277180406, "grad_norm": 0.23152867337219624, "learning_rate": 2.5531107738998482e-05, "loss": 0.2906, "num_tokens": 907296048.0, "step": 4750 }, { "epoch": 1.6216077828981055, "grad_norm": 0.20079835070093974, "learning_rate": 2.5524785027819932e-05, "loss": 0.3138, "num_tokens": 907490241.0, "step": 4751 }, { "epoch": 1.6219491380781703, "grad_norm": 0.20532462895127215, "learning_rate": 2.551846231664138e-05, "loss": 0.3217, "num_tokens": 907713520.0, "step": 4752 }, { "epoch": 1.6222904932582352, "grad_norm": 0.21246576233329825, "learning_rate": 2.5512139605462826e-05, "loss": 0.3095, "num_tokens": 907917654.0, "step": 4753 }, { "epoch": 1.6226318484383002, "grad_norm": 0.23195870790898435, "learning_rate": 2.5505816894284266e-05, "loss": 0.3122, "num_tokens": 908116186.0, "step": 4754 }, { "epoch": 1.6229732036183648, "grad_norm": 0.21149580868905074, "learning_rate": 2.5499494183105717e-05, "loss": 0.2878, "num_tokens": 908287452.0, "step": 4755 }, { "epoch": 1.62331455879843, "grad_norm": 0.2352034288567091, "learning_rate": 2.5493171471927164e-05, "loss": 0.3274, "num_tokens": 908483381.0, "step": 4756 }, { "epoch": 1.6236559139784945, "grad_norm": 0.2282913681460258, "learning_rate": 2.548684876074861e-05, "loss": 0.3069, "num_tokens": 908667981.0, "step": 4757 }, { "epoch": 1.6239972691585596, "grad_norm": 0.18181498910070737, "learning_rate": 2.5480526049570058e-05, "loss": 0.311, "num_tokens": 908891438.0, "step": 4758 }, { "epoch": 1.6243386243386242, "grad_norm": 0.21754266494757116, "learning_rate": 2.5474203338391505e-05, "loss": 0.3318, "num_tokens": 909095404.0, "step": 4759 }, { "epoch": 1.6246799795186893, "grad_norm": 0.23603472196455388, "learning_rate": 2.5467880627212948e-05, "loss": 0.2819, "num_tokens": 909265645.0, "step": 4760 }, { "epoch": 1.625021334698754, "grad_norm": 0.19513518361267831, "learning_rate": 2.5461557916034395e-05, "loss": 0.3284, "num_tokens": 909459458.0, "step": 4761 }, { "epoch": 1.625362689878819, "grad_norm": 0.22448609187401017, "learning_rate": 2.5455235204855842e-05, "loss": 0.3115, "num_tokens": 909665606.0, "step": 4762 }, { "epoch": 1.6257040450588838, "grad_norm": 0.2033051682646596, "learning_rate": 2.544891249367729e-05, "loss": 0.2877, "num_tokens": 909867099.0, "step": 4763 }, { "epoch": 1.6260454002389486, "grad_norm": 0.22510584682421142, "learning_rate": 2.544258978249874e-05, "loss": 0.3417, "num_tokens": 910069154.0, "step": 4764 }, { "epoch": 1.6263867554190135, "grad_norm": 0.22079080553543812, "learning_rate": 2.5436267071320186e-05, "loss": 0.3079, "num_tokens": 910259031.0, "step": 4765 }, { "epoch": 1.6267281105990783, "grad_norm": 0.20521314010705785, "learning_rate": 2.5429944360141627e-05, "loss": 0.2723, "num_tokens": 910444948.0, "step": 4766 }, { "epoch": 1.6270694657791434, "grad_norm": 0.210662384498049, "learning_rate": 2.5423621648963074e-05, "loss": 0.31, "num_tokens": 910656808.0, "step": 4767 }, { "epoch": 1.627410820959208, "grad_norm": 0.18487459859577654, "learning_rate": 2.5417298937784524e-05, "loss": 0.2826, "num_tokens": 910854651.0, "step": 4768 }, { "epoch": 1.627752176139273, "grad_norm": 0.22457541472698267, "learning_rate": 2.541097622660597e-05, "loss": 0.3162, "num_tokens": 911080206.0, "step": 4769 }, { "epoch": 1.6280935313193377, "grad_norm": 0.19670452052829182, "learning_rate": 2.5404653515427418e-05, "loss": 0.3021, "num_tokens": 911250129.0, "step": 4770 }, { "epoch": 1.6284348864994027, "grad_norm": 0.2156035912749524, "learning_rate": 2.5398330804248865e-05, "loss": 0.3059, "num_tokens": 911452780.0, "step": 4771 }, { "epoch": 1.6287762416794673, "grad_norm": 0.22086111129737743, "learning_rate": 2.539200809307031e-05, "loss": 0.2891, "num_tokens": 911669274.0, "step": 4772 }, { "epoch": 1.6291175968595324, "grad_norm": 0.21936225802949252, "learning_rate": 2.5385685381891756e-05, "loss": 0.2894, "num_tokens": 911840925.0, "step": 4773 }, { "epoch": 1.6294589520395972, "grad_norm": 0.2266273718517604, "learning_rate": 2.5379362670713203e-05, "loss": 0.2944, "num_tokens": 912021715.0, "step": 4774 }, { "epoch": 1.629800307219662, "grad_norm": 0.21305041983125894, "learning_rate": 2.537303995953465e-05, "loss": 0.2939, "num_tokens": 912192401.0, "step": 4775 }, { "epoch": 1.630141662399727, "grad_norm": 0.23124127530507957, "learning_rate": 2.5366717248356097e-05, "loss": 0.2923, "num_tokens": 912371419.0, "step": 4776 }, { "epoch": 1.6304830175797917, "grad_norm": 0.2454568961009799, "learning_rate": 2.5360394537177547e-05, "loss": 0.3477, "num_tokens": 912607272.0, "step": 4777 }, { "epoch": 1.6308243727598566, "grad_norm": 0.21089963104104012, "learning_rate": 2.5354071825998987e-05, "loss": 0.2967, "num_tokens": 912795243.0, "step": 4778 }, { "epoch": 1.6311657279399214, "grad_norm": 0.21113419639225414, "learning_rate": 2.5347749114820434e-05, "loss": 0.3348, "num_tokens": 913017124.0, "step": 4779 }, { "epoch": 1.6315070831199865, "grad_norm": 0.22124607818609504, "learning_rate": 2.534142640364188e-05, "loss": 0.3095, "num_tokens": 913249599.0, "step": 4780 }, { "epoch": 1.631848438300051, "grad_norm": 0.18932818040734747, "learning_rate": 2.533510369246333e-05, "loss": 0.2945, "num_tokens": 913459838.0, "step": 4781 }, { "epoch": 1.6321897934801162, "grad_norm": 0.21759029380269673, "learning_rate": 2.532878098128478e-05, "loss": 0.3139, "num_tokens": 913672016.0, "step": 4782 }, { "epoch": 1.6325311486601808, "grad_norm": 0.20228642053891366, "learning_rate": 2.5322458270106225e-05, "loss": 0.3014, "num_tokens": 913875140.0, "step": 4783 }, { "epoch": 1.6328725038402458, "grad_norm": 0.22065490511981017, "learning_rate": 2.5316135558927666e-05, "loss": 0.3438, "num_tokens": 914087609.0, "step": 4784 }, { "epoch": 1.6332138590203107, "grad_norm": 0.2695461932912406, "learning_rate": 2.5309812847749116e-05, "loss": 0.2825, "num_tokens": 914248723.0, "step": 4785 }, { "epoch": 1.6335552142003755, "grad_norm": 0.22204782643701268, "learning_rate": 2.5303490136570563e-05, "loss": 0.3122, "num_tokens": 914409961.0, "step": 4786 }, { "epoch": 1.6338965693804404, "grad_norm": 0.2125832390239225, "learning_rate": 2.529716742539201e-05, "loss": 0.2876, "num_tokens": 914604428.0, "step": 4787 }, { "epoch": 1.6342379245605052, "grad_norm": 0.23790486654550813, "learning_rate": 2.5290844714213457e-05, "loss": 0.2897, "num_tokens": 914785557.0, "step": 4788 }, { "epoch": 1.63457927974057, "grad_norm": 0.19657873799099154, "learning_rate": 2.5284522003034904e-05, "loss": 0.3066, "num_tokens": 914992116.0, "step": 4789 }, { "epoch": 1.6349206349206349, "grad_norm": 0.22387047486211786, "learning_rate": 2.5278199291856347e-05, "loss": 0.3103, "num_tokens": 915160011.0, "step": 4790 }, { "epoch": 1.6352619901006997, "grad_norm": 0.2302155623256888, "learning_rate": 2.5271876580677794e-05, "loss": 0.3352, "num_tokens": 915365246.0, "step": 4791 }, { "epoch": 1.6356033452807646, "grad_norm": 0.2142861518202605, "learning_rate": 2.526555386949924e-05, "loss": 0.3032, "num_tokens": 915554071.0, "step": 4792 }, { "epoch": 1.6359447004608296, "grad_norm": 0.23533056463716667, "learning_rate": 2.525923115832069e-05, "loss": 0.3423, "num_tokens": 915740680.0, "step": 4793 }, { "epoch": 1.6362860556408942, "grad_norm": 0.24365901631732412, "learning_rate": 2.525290844714214e-05, "loss": 0.2987, "num_tokens": 915962630.0, "step": 4794 }, { "epoch": 1.6366274108209593, "grad_norm": 0.18495585434087197, "learning_rate": 2.5246585735963586e-05, "loss": 0.2976, "num_tokens": 916150952.0, "step": 4795 }, { "epoch": 1.636968766001024, "grad_norm": 0.23932716165342013, "learning_rate": 2.5240263024785026e-05, "loss": 0.292, "num_tokens": 916347939.0, "step": 4796 }, { "epoch": 1.637310121181089, "grad_norm": 0.19209079312256436, "learning_rate": 2.5233940313606473e-05, "loss": 0.2898, "num_tokens": 916522722.0, "step": 4797 }, { "epoch": 1.6376514763611538, "grad_norm": 0.2610449042641268, "learning_rate": 2.5227617602427923e-05, "loss": 0.3055, "num_tokens": 916721729.0, "step": 4798 }, { "epoch": 1.6379928315412187, "grad_norm": 0.20542908908366142, "learning_rate": 2.522129489124937e-05, "loss": 0.2768, "num_tokens": 916885403.0, "step": 4799 }, { "epoch": 1.6383341867212835, "grad_norm": 0.21142966176054698, "learning_rate": 2.5214972180070817e-05, "loss": 0.2817, "num_tokens": 917030277.0, "step": 4800 }, { "epoch": 1.6386755419013483, "grad_norm": 0.24296393162683533, "learning_rate": 2.5208649468892264e-05, "loss": 0.3061, "num_tokens": 917200178.0, "step": 4801 }, { "epoch": 1.6390168970814132, "grad_norm": 0.2165451415697232, "learning_rate": 2.5202326757713708e-05, "loss": 0.2998, "num_tokens": 917437952.0, "step": 4802 }, { "epoch": 1.639358252261478, "grad_norm": 0.2105426189568474, "learning_rate": 2.5196004046535155e-05, "loss": 0.2893, "num_tokens": 917589533.0, "step": 4803 }, { "epoch": 1.639699607441543, "grad_norm": 0.22211591248951013, "learning_rate": 2.5189681335356602e-05, "loss": 0.3085, "num_tokens": 917777398.0, "step": 4804 }, { "epoch": 1.6400409626216077, "grad_norm": 0.24904434077047785, "learning_rate": 2.518335862417805e-05, "loss": 0.2973, "num_tokens": 917955686.0, "step": 4805 }, { "epoch": 1.6403823178016728, "grad_norm": 0.1994117966297019, "learning_rate": 2.5177035912999496e-05, "loss": 0.3061, "num_tokens": 918142570.0, "step": 4806 }, { "epoch": 1.6407236729817374, "grad_norm": 0.21616800552353835, "learning_rate": 2.5170713201820946e-05, "loss": 0.296, "num_tokens": 918309099.0, "step": 4807 }, { "epoch": 1.6410650281618024, "grad_norm": 0.19998471625919007, "learning_rate": 2.5164390490642386e-05, "loss": 0.3052, "num_tokens": 918540355.0, "step": 4808 }, { "epoch": 1.641406383341867, "grad_norm": 0.22243189034354785, "learning_rate": 2.5158067779463833e-05, "loss": 0.3477, "num_tokens": 918783487.0, "step": 4809 }, { "epoch": 1.6417477385219321, "grad_norm": 0.29387609222349986, "learning_rate": 2.515174506828528e-05, "loss": 0.3075, "num_tokens": 918962570.0, "step": 4810 }, { "epoch": 1.642089093701997, "grad_norm": 0.2535034264269777, "learning_rate": 2.514542235710673e-05, "loss": 0.3223, "num_tokens": 919191547.0, "step": 4811 }, { "epoch": 1.6424304488820618, "grad_norm": 0.22922517762089906, "learning_rate": 2.5139099645928178e-05, "loss": 0.2877, "num_tokens": 919321971.0, "step": 4812 }, { "epoch": 1.6427718040621266, "grad_norm": 0.2361283676814904, "learning_rate": 2.5132776934749625e-05, "loss": 0.3112, "num_tokens": 919484336.0, "step": 4813 }, { "epoch": 1.6431131592421915, "grad_norm": 0.21540755384829435, "learning_rate": 2.5126454223571065e-05, "loss": 0.2855, "num_tokens": 919637731.0, "step": 4814 }, { "epoch": 1.6434545144222563, "grad_norm": 0.22260207091647943, "learning_rate": 2.5120131512392515e-05, "loss": 0.3082, "num_tokens": 919853764.0, "step": 4815 }, { "epoch": 1.6437958696023212, "grad_norm": 0.2028573429944481, "learning_rate": 2.5113808801213962e-05, "loss": 0.2969, "num_tokens": 920073865.0, "step": 4816 }, { "epoch": 1.6441372247823862, "grad_norm": 0.19202817133184025, "learning_rate": 2.510748609003541e-05, "loss": 0.2753, "num_tokens": 920254046.0, "step": 4817 }, { "epoch": 1.6444785799624508, "grad_norm": 0.19937522950755807, "learning_rate": 2.5101163378856856e-05, "loss": 0.292, "num_tokens": 920458050.0, "step": 4818 }, { "epoch": 1.644819935142516, "grad_norm": 0.18937236889273026, "learning_rate": 2.5094840667678303e-05, "loss": 0.3036, "num_tokens": 920682111.0, "step": 4819 }, { "epoch": 1.6451612903225805, "grad_norm": 0.2011564474715102, "learning_rate": 2.5088517956499747e-05, "loss": 0.2957, "num_tokens": 920864101.0, "step": 4820 }, { "epoch": 1.6455026455026456, "grad_norm": 0.23751112691515225, "learning_rate": 2.5082195245321194e-05, "loss": 0.3158, "num_tokens": 921066692.0, "step": 4821 }, { "epoch": 1.6458440006827102, "grad_norm": 0.23259092817967442, "learning_rate": 2.507587253414264e-05, "loss": 0.3049, "num_tokens": 921257180.0, "step": 4822 }, { "epoch": 1.6461853558627753, "grad_norm": 0.20967181692777626, "learning_rate": 2.5069549822964088e-05, "loss": 0.3146, "num_tokens": 921470909.0, "step": 4823 }, { "epoch": 1.64652671104284, "grad_norm": 0.230775163122175, "learning_rate": 2.5063227111785538e-05, "loss": 0.3008, "num_tokens": 921663474.0, "step": 4824 }, { "epoch": 1.646868066222905, "grad_norm": 0.20534754944610253, "learning_rate": 2.5056904400606985e-05, "loss": 0.3048, "num_tokens": 921843250.0, "step": 4825 }, { "epoch": 1.6472094214029698, "grad_norm": 0.2398921166117168, "learning_rate": 2.5050581689428425e-05, "loss": 0.3135, "num_tokens": 922022784.0, "step": 4826 }, { "epoch": 1.6475507765830346, "grad_norm": 0.2252374242722889, "learning_rate": 2.5044258978249872e-05, "loss": 0.3016, "num_tokens": 922234019.0, "step": 4827 }, { "epoch": 1.6478921317630995, "grad_norm": 0.2540303492483057, "learning_rate": 2.5037936267071323e-05, "loss": 0.3127, "num_tokens": 922383267.0, "step": 4828 }, { "epoch": 1.6482334869431643, "grad_norm": 0.22735576658365172, "learning_rate": 2.503161355589277e-05, "loss": 0.2784, "num_tokens": 922550567.0, "step": 4829 }, { "epoch": 1.6485748421232294, "grad_norm": 0.21050415271247253, "learning_rate": 2.5025290844714217e-05, "loss": 0.3048, "num_tokens": 922753287.0, "step": 4830 }, { "epoch": 1.648916197303294, "grad_norm": 0.5584535081463623, "learning_rate": 2.5018968133535664e-05, "loss": 0.3528, "num_tokens": 922957242.0, "step": 4831 }, { "epoch": 1.649257552483359, "grad_norm": 0.21946095194973014, "learning_rate": 2.5012645422357107e-05, "loss": 0.2914, "num_tokens": 923133045.0, "step": 4832 }, { "epoch": 1.6495989076634237, "grad_norm": 0.24322184987307433, "learning_rate": 2.5006322711178554e-05, "loss": 0.2964, "num_tokens": 923300908.0, "step": 4833 }, { "epoch": 1.6499402628434887, "grad_norm": 0.24540254475798226, "learning_rate": 2.5e-05, "loss": 0.3195, "num_tokens": 923505093.0, "step": 4834 }, { "epoch": 1.6502816180235536, "grad_norm": 0.23201077027607017, "learning_rate": 2.4993677288821448e-05, "loss": 0.3156, "num_tokens": 923682800.0, "step": 4835 }, { "epoch": 1.6506229732036184, "grad_norm": 0.22021827464357516, "learning_rate": 2.4987354577642895e-05, "loss": 0.3083, "num_tokens": 923847885.0, "step": 4836 }, { "epoch": 1.6509643283836832, "grad_norm": 0.22978591711720925, "learning_rate": 2.4981031866464342e-05, "loss": 0.3217, "num_tokens": 923998825.0, "step": 4837 }, { "epoch": 1.651305683563748, "grad_norm": 0.24824549748959662, "learning_rate": 2.497470915528579e-05, "loss": 0.2832, "num_tokens": 924205827.0, "step": 4838 }, { "epoch": 1.651647038743813, "grad_norm": 0.2265347091561004, "learning_rate": 2.4968386444107233e-05, "loss": 0.3308, "num_tokens": 924470832.0, "step": 4839 }, { "epoch": 1.6519883939238778, "grad_norm": 0.17616885017794023, "learning_rate": 2.496206373292868e-05, "loss": 0.3048, "num_tokens": 924670627.0, "step": 4840 }, { "epoch": 1.6523297491039428, "grad_norm": 0.22955492375202866, "learning_rate": 2.495574102175013e-05, "loss": 0.2821, "num_tokens": 924862328.0, "step": 4841 }, { "epoch": 1.6526711042840074, "grad_norm": 0.2007604250148318, "learning_rate": 2.4949418310571574e-05, "loss": 0.3038, "num_tokens": 925054337.0, "step": 4842 }, { "epoch": 1.6530124594640725, "grad_norm": 0.2111808321368695, "learning_rate": 2.494309559939302e-05, "loss": 0.2824, "num_tokens": 925259840.0, "step": 4843 }, { "epoch": 1.653353814644137, "grad_norm": 0.21576043683735097, "learning_rate": 2.4936772888214468e-05, "loss": 0.3228, "num_tokens": 925499691.0, "step": 4844 }, { "epoch": 1.6536951698242022, "grad_norm": 0.19647890817225802, "learning_rate": 2.4930450177035915e-05, "loss": 0.281, "num_tokens": 925679373.0, "step": 4845 }, { "epoch": 1.6540365250042668, "grad_norm": 0.23439434327412, "learning_rate": 2.492412746585736e-05, "loss": 0.3011, "num_tokens": 925838083.0, "step": 4846 }, { "epoch": 1.6543778801843319, "grad_norm": 0.21424081549997465, "learning_rate": 2.491780475467881e-05, "loss": 0.3139, "num_tokens": 926032878.0, "step": 4847 }, { "epoch": 1.6547192353643967, "grad_norm": 0.2418527704278789, "learning_rate": 2.4911482043500252e-05, "loss": 0.3105, "num_tokens": 926200986.0, "step": 4848 }, { "epoch": 1.6550605905444615, "grad_norm": 0.22890056138140952, "learning_rate": 2.49051593323217e-05, "loss": 0.3221, "num_tokens": 926421889.0, "step": 4849 }, { "epoch": 1.6554019457245264, "grad_norm": 0.20648487835535972, "learning_rate": 2.489883662114315e-05, "loss": 0.3004, "num_tokens": 926620937.0, "step": 4850 }, { "epoch": 1.6557433009045912, "grad_norm": 0.2079798977709343, "learning_rate": 2.4892513909964593e-05, "loss": 0.286, "num_tokens": 926830830.0, "step": 4851 }, { "epoch": 1.656084656084656, "grad_norm": 0.21283808571221133, "learning_rate": 2.488619119878604e-05, "loss": 0.2986, "num_tokens": 927063612.0, "step": 4852 }, { "epoch": 1.6564260112647209, "grad_norm": 0.18973706923198527, "learning_rate": 2.4879868487607487e-05, "loss": 0.3051, "num_tokens": 927257213.0, "step": 4853 }, { "epoch": 1.656767366444786, "grad_norm": 0.23000041294337323, "learning_rate": 2.4873545776428934e-05, "loss": 0.3223, "num_tokens": 927417587.0, "step": 4854 }, { "epoch": 1.6571087216248506, "grad_norm": 0.2427458820142838, "learning_rate": 2.486722306525038e-05, "loss": 0.2998, "num_tokens": 927625552.0, "step": 4855 }, { "epoch": 1.6574500768049156, "grad_norm": 0.21533605923466145, "learning_rate": 2.4860900354071828e-05, "loss": 0.2768, "num_tokens": 927769496.0, "step": 4856 }, { "epoch": 1.6577914319849802, "grad_norm": 0.2102314438773368, "learning_rate": 2.485457764289327e-05, "loss": 0.3371, "num_tokens": 927995626.0, "step": 4857 }, { "epoch": 1.6581327871650453, "grad_norm": 0.2221427831024127, "learning_rate": 2.4848254931714722e-05, "loss": 0.2904, "num_tokens": 928183462.0, "step": 4858 }, { "epoch": 1.65847414234511, "grad_norm": 0.21942672994834125, "learning_rate": 2.484193222053617e-05, "loss": 0.3088, "num_tokens": 928384090.0, "step": 4859 }, { "epoch": 1.658815497525175, "grad_norm": 0.22942630746938186, "learning_rate": 2.4835609509357613e-05, "loss": 0.2843, "num_tokens": 928553735.0, "step": 4860 }, { "epoch": 1.6591568527052398, "grad_norm": 0.21347457774772458, "learning_rate": 2.482928679817906e-05, "loss": 0.2762, "num_tokens": 928723809.0, "step": 4861 }, { "epoch": 1.6594982078853047, "grad_norm": 0.2447704880211494, "learning_rate": 2.4822964087000507e-05, "loss": 0.2733, "num_tokens": 928872130.0, "step": 4862 }, { "epoch": 1.6598395630653695, "grad_norm": 0.2143505425491116, "learning_rate": 2.4816641375821954e-05, "loss": 0.3221, "num_tokens": 929036850.0, "step": 4863 }, { "epoch": 1.6601809182454343, "grad_norm": 0.2481779849528392, "learning_rate": 2.48103186646434e-05, "loss": 0.2876, "num_tokens": 929217423.0, "step": 4864 }, { "epoch": 1.6605222734254992, "grad_norm": 0.1717183169391493, "learning_rate": 2.4803995953464848e-05, "loss": 0.3002, "num_tokens": 929454641.0, "step": 4865 }, { "epoch": 1.660863628605564, "grad_norm": 0.2228598549436969, "learning_rate": 2.4797673242286294e-05, "loss": 0.3293, "num_tokens": 929664435.0, "step": 4866 }, { "epoch": 1.661204983785629, "grad_norm": 0.2396932267952953, "learning_rate": 2.479135053110774e-05, "loss": 0.3394, "num_tokens": 929872393.0, "step": 4867 }, { "epoch": 1.6615463389656937, "grad_norm": 0.21685842468784308, "learning_rate": 2.478502781992919e-05, "loss": 0.3144, "num_tokens": 930039604.0, "step": 4868 }, { "epoch": 1.6618876941457588, "grad_norm": 0.23590029551062452, "learning_rate": 2.4778705108750632e-05, "loss": 0.3134, "num_tokens": 930213800.0, "step": 4869 }, { "epoch": 1.6622290493258234, "grad_norm": 0.2201448023897793, "learning_rate": 2.477238239757208e-05, "loss": 0.2973, "num_tokens": 930391547.0, "step": 4870 }, { "epoch": 1.6625704045058884, "grad_norm": 0.21673093259782375, "learning_rate": 2.476605968639353e-05, "loss": 0.2953, "num_tokens": 930556014.0, "step": 4871 }, { "epoch": 1.6629117596859533, "grad_norm": 0.19159453533196263, "learning_rate": 2.4759736975214973e-05, "loss": 0.2924, "num_tokens": 930750330.0, "step": 4872 }, { "epoch": 1.6632531148660181, "grad_norm": 0.23000611694050854, "learning_rate": 2.475341426403642e-05, "loss": 0.299, "num_tokens": 930925065.0, "step": 4873 }, { "epoch": 1.663594470046083, "grad_norm": 0.2103314162123871, "learning_rate": 2.4747091552857867e-05, "loss": 0.31, "num_tokens": 931111429.0, "step": 4874 }, { "epoch": 1.6639358252261478, "grad_norm": 0.21575607960491908, "learning_rate": 2.4740768841679314e-05, "loss": 0.2907, "num_tokens": 931302782.0, "step": 4875 }, { "epoch": 1.6642771804062126, "grad_norm": 0.2018885777912645, "learning_rate": 2.473444613050076e-05, "loss": 0.2788, "num_tokens": 931477182.0, "step": 4876 }, { "epoch": 1.6646185355862775, "grad_norm": 0.22094977152085754, "learning_rate": 2.4728123419322208e-05, "loss": 0.3023, "num_tokens": 931667777.0, "step": 4877 }, { "epoch": 1.6649598907663425, "grad_norm": 0.22107031513414602, "learning_rate": 2.472180070814365e-05, "loss": 0.2831, "num_tokens": 931865748.0, "step": 4878 }, { "epoch": 1.6653012459464072, "grad_norm": 0.2237685734801217, "learning_rate": 2.47154779969651e-05, "loss": 0.2906, "num_tokens": 932040205.0, "step": 4879 }, { "epoch": 1.6656426011264722, "grad_norm": 0.23610847610138644, "learning_rate": 2.470915528578655e-05, "loss": 0.3024, "num_tokens": 932213064.0, "step": 4880 }, { "epoch": 1.6659839563065368, "grad_norm": 0.22877140912591648, "learning_rate": 2.4702832574607992e-05, "loss": 0.3107, "num_tokens": 932423914.0, "step": 4881 }, { "epoch": 1.666325311486602, "grad_norm": 0.2292341629481244, "learning_rate": 2.469650986342944e-05, "loss": 0.3131, "num_tokens": 932601084.0, "step": 4882 }, { "epoch": 1.6666666666666665, "grad_norm": 0.22695237867354548, "learning_rate": 2.4690187152250886e-05, "loss": 0.295, "num_tokens": 932778614.0, "step": 4883 }, { "epoch": 1.6670080218467316, "grad_norm": 0.2505165645007201, "learning_rate": 2.4683864441072333e-05, "loss": 0.3387, "num_tokens": 932963045.0, "step": 4884 }, { "epoch": 1.6673493770267964, "grad_norm": 0.2311288828690131, "learning_rate": 2.467754172989378e-05, "loss": 0.3164, "num_tokens": 933173137.0, "step": 4885 }, { "epoch": 1.6676907322068613, "grad_norm": 0.21587616986600164, "learning_rate": 2.4671219018715227e-05, "loss": 0.289, "num_tokens": 933342779.0, "step": 4886 }, { "epoch": 1.668032087386926, "grad_norm": 0.22355443665162197, "learning_rate": 2.466489630753667e-05, "loss": 0.3157, "num_tokens": 933548940.0, "step": 4887 }, { "epoch": 1.668373442566991, "grad_norm": 0.24027700602893934, "learning_rate": 2.465857359635812e-05, "loss": 0.2859, "num_tokens": 933699199.0, "step": 4888 }, { "epoch": 1.6687147977470558, "grad_norm": 0.24342518672882035, "learning_rate": 2.465225088517957e-05, "loss": 0.3158, "num_tokens": 933880303.0, "step": 4889 }, { "epoch": 1.6690561529271206, "grad_norm": 0.2136225755683353, "learning_rate": 2.4645928174001012e-05, "loss": 0.2904, "num_tokens": 934043285.0, "step": 4890 }, { "epoch": 1.6693975081071857, "grad_norm": 0.2514068355550002, "learning_rate": 2.463960546282246e-05, "loss": 0.2928, "num_tokens": 934184085.0, "step": 4891 }, { "epoch": 1.6697388632872503, "grad_norm": 0.21727979317721474, "learning_rate": 2.4633282751643906e-05, "loss": 0.315, "num_tokens": 934426573.0, "step": 4892 }, { "epoch": 1.6700802184673154, "grad_norm": 0.20556091947388758, "learning_rate": 2.4626960040465353e-05, "loss": 0.3, "num_tokens": 934603691.0, "step": 4893 }, { "epoch": 1.67042157364738, "grad_norm": 0.241105748857299, "learning_rate": 2.46206373292868e-05, "loss": 0.3095, "num_tokens": 934773326.0, "step": 4894 }, { "epoch": 1.670762928827445, "grad_norm": 0.23254489042233237, "learning_rate": 2.4614314618108247e-05, "loss": 0.2945, "num_tokens": 934945700.0, "step": 4895 }, { "epoch": 1.6711042840075097, "grad_norm": 0.21180804330581388, "learning_rate": 2.4607991906929694e-05, "loss": 0.2708, "num_tokens": 935094757.0, "step": 4896 }, { "epoch": 1.6714456391875747, "grad_norm": 0.22959686254785583, "learning_rate": 2.460166919575114e-05, "loss": 0.2633, "num_tokens": 935258853.0, "step": 4897 }, { "epoch": 1.6717869943676396, "grad_norm": 0.2228976470410399, "learning_rate": 2.4595346484572588e-05, "loss": 0.312, "num_tokens": 935423989.0, "step": 4898 }, { "epoch": 1.6721283495477044, "grad_norm": 0.23726689011295996, "learning_rate": 2.458902377339403e-05, "loss": 0.3053, "num_tokens": 935625086.0, "step": 4899 }, { "epoch": 1.6724697047277692, "grad_norm": 0.23138150802162408, "learning_rate": 2.458270106221548e-05, "loss": 0.3184, "num_tokens": 935874553.0, "step": 4900 }, { "epoch": 1.672811059907834, "grad_norm": 0.21618701833728968, "learning_rate": 2.4576378351036925e-05, "loss": 0.314, "num_tokens": 936071095.0, "step": 4901 }, { "epoch": 1.673152415087899, "grad_norm": 0.22928188155734008, "learning_rate": 2.4570055639858372e-05, "loss": 0.2915, "num_tokens": 936227236.0, "step": 4902 }, { "epoch": 1.6734937702679638, "grad_norm": 0.2207388754311965, "learning_rate": 2.456373292867982e-05, "loss": 0.303, "num_tokens": 936436182.0, "step": 4903 }, { "epoch": 1.6738351254480288, "grad_norm": 0.22248670073128404, "learning_rate": 2.4557410217501263e-05, "loss": 0.3092, "num_tokens": 936619028.0, "step": 4904 }, { "epoch": 1.6741764806280934, "grad_norm": 0.1900010413389877, "learning_rate": 2.4551087506322713e-05, "loss": 0.3076, "num_tokens": 936841280.0, "step": 4905 }, { "epoch": 1.6745178358081585, "grad_norm": 0.2183191477809641, "learning_rate": 2.454476479514416e-05, "loss": 0.3103, "num_tokens": 937051589.0, "step": 4906 }, { "epoch": 1.6748591909882231, "grad_norm": 0.20264113943354906, "learning_rate": 2.4538442083965604e-05, "loss": 0.2918, "num_tokens": 937234173.0, "step": 4907 }, { "epoch": 1.6752005461682882, "grad_norm": 0.21170119833666545, "learning_rate": 2.453211937278705e-05, "loss": 0.3173, "num_tokens": 937430669.0, "step": 4908 }, { "epoch": 1.675541901348353, "grad_norm": 0.24442995560603592, "learning_rate": 2.4525796661608498e-05, "loss": 0.33, "num_tokens": 937638775.0, "step": 4909 }, { "epoch": 1.6758832565284179, "grad_norm": 0.1878098481294942, "learning_rate": 2.4519473950429945e-05, "loss": 0.3025, "num_tokens": 937855708.0, "step": 4910 }, { "epoch": 1.6762246117084827, "grad_norm": 0.19659835490747996, "learning_rate": 2.4513151239251392e-05, "loss": 0.2974, "num_tokens": 938030823.0, "step": 4911 }, { "epoch": 1.6765659668885475, "grad_norm": 0.2124912387936192, "learning_rate": 2.450682852807284e-05, "loss": 0.2976, "num_tokens": 938219471.0, "step": 4912 }, { "epoch": 1.6769073220686124, "grad_norm": 0.21227041378674702, "learning_rate": 2.4500505816894286e-05, "loss": 0.3006, "num_tokens": 938442899.0, "step": 4913 }, { "epoch": 1.6772486772486772, "grad_norm": 0.20592950262300386, "learning_rate": 2.4494183105715733e-05, "loss": 0.2895, "num_tokens": 938628290.0, "step": 4914 }, { "epoch": 1.6775900324287423, "grad_norm": 0.20885507770510697, "learning_rate": 2.448786039453718e-05, "loss": 0.2983, "num_tokens": 938810589.0, "step": 4915 }, { "epoch": 1.677931387608807, "grad_norm": 0.2238334929502262, "learning_rate": 2.4481537683358623e-05, "loss": 0.291, "num_tokens": 938963947.0, "step": 4916 }, { "epoch": 1.678272742788872, "grad_norm": 0.19695063872018403, "learning_rate": 2.447521497218007e-05, "loss": 0.3094, "num_tokens": 939167256.0, "step": 4917 }, { "epoch": 1.6786140979689366, "grad_norm": 0.208710204893791, "learning_rate": 2.446889226100152e-05, "loss": 0.2976, "num_tokens": 939333031.0, "step": 4918 }, { "epoch": 1.6789554531490016, "grad_norm": 0.2322140615755462, "learning_rate": 2.4462569549822964e-05, "loss": 0.3174, "num_tokens": 939506615.0, "step": 4919 }, { "epoch": 1.6792968083290662, "grad_norm": 0.2433326646774139, "learning_rate": 2.445624683864441e-05, "loss": 0.3253, "num_tokens": 939704518.0, "step": 4920 }, { "epoch": 1.6796381635091313, "grad_norm": 0.22044520822526445, "learning_rate": 2.4449924127465858e-05, "loss": 0.2819, "num_tokens": 939917458.0, "step": 4921 }, { "epoch": 1.6799795186891961, "grad_norm": 0.2135576327587405, "learning_rate": 2.4443601416287305e-05, "loss": 0.2659, "num_tokens": 940058651.0, "step": 4922 }, { "epoch": 1.680320873869261, "grad_norm": 0.23878286172129554, "learning_rate": 2.4437278705108752e-05, "loss": 0.3498, "num_tokens": 940292595.0, "step": 4923 }, { "epoch": 1.6806622290493258, "grad_norm": 0.23110141922014396, "learning_rate": 2.44309559939302e-05, "loss": 0.2916, "num_tokens": 940464978.0, "step": 4924 }, { "epoch": 1.6810035842293907, "grad_norm": 0.253010802584972, "learning_rate": 2.4424633282751643e-05, "loss": 0.3084, "num_tokens": 940668307.0, "step": 4925 }, { "epoch": 1.6813449394094555, "grad_norm": 0.20377427814142518, "learning_rate": 2.441831057157309e-05, "loss": 0.2794, "num_tokens": 940843847.0, "step": 4926 }, { "epoch": 1.6816862945895203, "grad_norm": 0.22577197908545002, "learning_rate": 2.441198786039454e-05, "loss": 0.3337, "num_tokens": 941014023.0, "step": 4927 }, { "epoch": 1.6820276497695854, "grad_norm": 0.20773870163084246, "learning_rate": 2.4405665149215984e-05, "loss": 0.308, "num_tokens": 941200751.0, "step": 4928 }, { "epoch": 1.68236900494965, "grad_norm": 0.2149184435267815, "learning_rate": 2.439934243803743e-05, "loss": 0.2865, "num_tokens": 941389499.0, "step": 4929 }, { "epoch": 1.682710360129715, "grad_norm": 0.22826166636562745, "learning_rate": 2.4393019726858878e-05, "loss": 0.3073, "num_tokens": 941588214.0, "step": 4930 }, { "epoch": 1.6830517153097797, "grad_norm": 0.17696171322266752, "learning_rate": 2.4386697015680325e-05, "loss": 0.2843, "num_tokens": 941839308.0, "step": 4931 }, { "epoch": 1.6833930704898448, "grad_norm": 0.21193835834414806, "learning_rate": 2.438037430450177e-05, "loss": 0.2814, "num_tokens": 942014263.0, "step": 4932 }, { "epoch": 1.6837344256699094, "grad_norm": 0.23334657341189652, "learning_rate": 2.437405159332322e-05, "loss": 0.2993, "num_tokens": 942159856.0, "step": 4933 }, { "epoch": 1.6840757808499744, "grad_norm": 0.2344940702663223, "learning_rate": 2.4367728882144662e-05, "loss": 0.2963, "num_tokens": 942335382.0, "step": 4934 }, { "epoch": 1.6844171360300393, "grad_norm": 0.2132289684395119, "learning_rate": 2.4361406170966113e-05, "loss": 0.2942, "num_tokens": 942541515.0, "step": 4935 }, { "epoch": 1.6847584912101041, "grad_norm": 0.2186786840800353, "learning_rate": 2.435508345978756e-05, "loss": 0.349, "num_tokens": 942749447.0, "step": 4936 }, { "epoch": 1.685099846390169, "grad_norm": 0.22207842187103194, "learning_rate": 2.4348760748609003e-05, "loss": 0.2955, "num_tokens": 942930147.0, "step": 4937 }, { "epoch": 1.6854412015702338, "grad_norm": 0.2360838213794174, "learning_rate": 2.434243803743045e-05, "loss": 0.3326, "num_tokens": 943108477.0, "step": 4938 }, { "epoch": 1.6857825567502986, "grad_norm": 0.23807800382638317, "learning_rate": 2.4336115326251897e-05, "loss": 0.2783, "num_tokens": 943306080.0, "step": 4939 }, { "epoch": 1.6861239119303635, "grad_norm": 0.18305370619862674, "learning_rate": 2.4329792615073344e-05, "loss": 0.2763, "num_tokens": 943487474.0, "step": 4940 }, { "epoch": 1.6864652671104285, "grad_norm": 0.22358368217587388, "learning_rate": 2.432346990389479e-05, "loss": 0.2812, "num_tokens": 943699493.0, "step": 4941 }, { "epoch": 1.6868066222904932, "grad_norm": 0.2222491640904063, "learning_rate": 2.4317147192716238e-05, "loss": 0.3359, "num_tokens": 943928104.0, "step": 4942 }, { "epoch": 1.6871479774705582, "grad_norm": 0.21956063162517267, "learning_rate": 2.4310824481537685e-05, "loss": 0.321, "num_tokens": 944114376.0, "step": 4943 }, { "epoch": 1.6874893326506228, "grad_norm": 0.22420015677542351, "learning_rate": 2.4304501770359132e-05, "loss": 0.3078, "num_tokens": 944314210.0, "step": 4944 }, { "epoch": 1.687830687830688, "grad_norm": 0.21725695438022274, "learning_rate": 2.429817905918058e-05, "loss": 0.2907, "num_tokens": 944560519.0, "step": 4945 }, { "epoch": 1.6881720430107527, "grad_norm": 0.20695234719358968, "learning_rate": 2.4291856348002023e-05, "loss": 0.3108, "num_tokens": 944768292.0, "step": 4946 }, { "epoch": 1.6885133981908176, "grad_norm": 0.1915358528977743, "learning_rate": 2.428553363682347e-05, "loss": 0.2885, "num_tokens": 944947516.0, "step": 4947 }, { "epoch": 1.6888547533708824, "grad_norm": 0.2230547215802576, "learning_rate": 2.427921092564492e-05, "loss": 0.291, "num_tokens": 945119353.0, "step": 4948 }, { "epoch": 1.6891961085509473, "grad_norm": 0.20990355560281607, "learning_rate": 2.4272888214466364e-05, "loss": 0.2929, "num_tokens": 945317919.0, "step": 4949 }, { "epoch": 1.689537463731012, "grad_norm": 0.2050883333103357, "learning_rate": 2.426656550328781e-05, "loss": 0.3018, "num_tokens": 945517024.0, "step": 4950 }, { "epoch": 1.689878818911077, "grad_norm": 0.2327395205002521, "learning_rate": 2.4260242792109258e-05, "loss": 0.2925, "num_tokens": 945699669.0, "step": 4951 }, { "epoch": 1.6902201740911418, "grad_norm": 0.20082761118774142, "learning_rate": 2.4253920080930705e-05, "loss": 0.3129, "num_tokens": 945924628.0, "step": 4952 }, { "epoch": 1.6905615292712066, "grad_norm": 0.2294523437290344, "learning_rate": 2.424759736975215e-05, "loss": 0.301, "num_tokens": 946107734.0, "step": 4953 }, { "epoch": 1.6909028844512717, "grad_norm": 0.1945754027431144, "learning_rate": 2.42412746585736e-05, "loss": 0.2948, "num_tokens": 946284835.0, "step": 4954 }, { "epoch": 1.6912442396313363, "grad_norm": 0.21036165039022436, "learning_rate": 2.4234951947395042e-05, "loss": 0.2801, "num_tokens": 946476314.0, "step": 4955 }, { "epoch": 1.6915855948114014, "grad_norm": 0.24195569202064948, "learning_rate": 2.422862923621649e-05, "loss": 0.3337, "num_tokens": 946645381.0, "step": 4956 }, { "epoch": 1.691926949991466, "grad_norm": 0.2459642836784043, "learning_rate": 2.422230652503794e-05, "loss": 0.3116, "num_tokens": 946845007.0, "step": 4957 }, { "epoch": 1.692268305171531, "grad_norm": 0.18574914877896062, "learning_rate": 2.4215983813859383e-05, "loss": 0.3342, "num_tokens": 947067835.0, "step": 4958 }, { "epoch": 1.6926096603515959, "grad_norm": 0.24409049651463047, "learning_rate": 2.420966110268083e-05, "loss": 0.3054, "num_tokens": 947237403.0, "step": 4959 }, { "epoch": 1.6929510155316607, "grad_norm": 0.5528772753280444, "learning_rate": 2.4203338391502277e-05, "loss": 0.2945, "num_tokens": 947443678.0, "step": 4960 }, { "epoch": 1.6932923707117256, "grad_norm": 0.20616283825662787, "learning_rate": 2.4197015680323724e-05, "loss": 0.2787, "num_tokens": 947591195.0, "step": 4961 }, { "epoch": 1.6936337258917904, "grad_norm": 0.24949288388933358, "learning_rate": 2.419069296914517e-05, "loss": 0.2808, "num_tokens": 947767539.0, "step": 4962 }, { "epoch": 1.6939750810718552, "grad_norm": 0.21337058168523546, "learning_rate": 2.4184370257966618e-05, "loss": 0.2815, "num_tokens": 947955758.0, "step": 4963 }, { "epoch": 1.69431643625192, "grad_norm": 0.2066970348108671, "learning_rate": 2.417804754678806e-05, "loss": 0.3358, "num_tokens": 948139264.0, "step": 4964 }, { "epoch": 1.6946577914319851, "grad_norm": 0.22981586393800105, "learning_rate": 2.4171724835609512e-05, "loss": 0.3228, "num_tokens": 948315526.0, "step": 4965 }, { "epoch": 1.6949991466120498, "grad_norm": 0.2624039814916712, "learning_rate": 2.416540212443096e-05, "loss": 0.2891, "num_tokens": 948486216.0, "step": 4966 }, { "epoch": 1.6953405017921148, "grad_norm": 0.27272298257208505, "learning_rate": 2.4159079413252403e-05, "loss": 0.2913, "num_tokens": 948667852.0, "step": 4967 }, { "epoch": 1.6956818569721794, "grad_norm": 0.21162849323200655, "learning_rate": 2.415275670207385e-05, "loss": 0.3137, "num_tokens": 948854770.0, "step": 4968 }, { "epoch": 1.6960232121522445, "grad_norm": 0.2395627645211665, "learning_rate": 2.4146433990895296e-05, "loss": 0.3117, "num_tokens": 949030100.0, "step": 4969 }, { "epoch": 1.6963645673323091, "grad_norm": 0.21929438496279255, "learning_rate": 2.4140111279716743e-05, "loss": 0.2887, "num_tokens": 949238451.0, "step": 4970 }, { "epoch": 1.6967059225123742, "grad_norm": 0.22418817202731808, "learning_rate": 2.413378856853819e-05, "loss": 0.3134, "num_tokens": 949439699.0, "step": 4971 }, { "epoch": 1.697047277692439, "grad_norm": 0.2234866989564279, "learning_rate": 2.4127465857359637e-05, "loss": 0.3083, "num_tokens": 949635878.0, "step": 4972 }, { "epoch": 1.6973886328725039, "grad_norm": 0.2181999793743592, "learning_rate": 2.4121143146181084e-05, "loss": 0.3178, "num_tokens": 949841407.0, "step": 4973 }, { "epoch": 1.6977299880525687, "grad_norm": 0.22857085541362213, "learning_rate": 2.411482043500253e-05, "loss": 0.2957, "num_tokens": 950040659.0, "step": 4974 }, { "epoch": 1.6980713432326335, "grad_norm": 0.2201763417465958, "learning_rate": 2.410849772382398e-05, "loss": 0.2926, "num_tokens": 950249976.0, "step": 4975 }, { "epoch": 1.6984126984126984, "grad_norm": 0.20629193005305835, "learning_rate": 2.4102175012645422e-05, "loss": 0.3219, "num_tokens": 950444107.0, "step": 4976 }, { "epoch": 1.6987540535927632, "grad_norm": 0.21426881786397667, "learning_rate": 2.409585230146687e-05, "loss": 0.296, "num_tokens": 950621691.0, "step": 4977 }, { "epoch": 1.6990954087728283, "grad_norm": 0.2219854526385435, "learning_rate": 2.408952959028832e-05, "loss": 0.3002, "num_tokens": 950814829.0, "step": 4978 }, { "epoch": 1.699436763952893, "grad_norm": 0.2277454068973936, "learning_rate": 2.4083206879109763e-05, "loss": 0.2902, "num_tokens": 950988333.0, "step": 4979 }, { "epoch": 1.699778119132958, "grad_norm": 0.2066407966063378, "learning_rate": 2.407688416793121e-05, "loss": 0.3178, "num_tokens": 951152336.0, "step": 4980 }, { "epoch": 1.7001194743130226, "grad_norm": 0.23455675242664992, "learning_rate": 2.4070561456752657e-05, "loss": 0.3174, "num_tokens": 951360324.0, "step": 4981 }, { "epoch": 1.7004608294930876, "grad_norm": 0.21065469599295927, "learning_rate": 2.4064238745574104e-05, "loss": 0.3162, "num_tokens": 951563705.0, "step": 4982 }, { "epoch": 1.7008021846731523, "grad_norm": 0.20936687088713254, "learning_rate": 2.405791603439555e-05, "loss": 0.3229, "num_tokens": 951765182.0, "step": 4983 }, { "epoch": 1.7011435398532173, "grad_norm": 0.23722543641665048, "learning_rate": 2.4051593323216998e-05, "loss": 0.3156, "num_tokens": 951944420.0, "step": 4984 }, { "epoch": 1.7014848950332822, "grad_norm": 0.22453254000096268, "learning_rate": 2.404527061203844e-05, "loss": 0.311, "num_tokens": 952151353.0, "step": 4985 }, { "epoch": 1.701826250213347, "grad_norm": 0.20874148600086884, "learning_rate": 2.403894790085989e-05, "loss": 0.2997, "num_tokens": 952350020.0, "step": 4986 }, { "epoch": 1.7021676053934118, "grad_norm": 0.2684547289428918, "learning_rate": 2.403262518968134e-05, "loss": 0.3309, "num_tokens": 952540018.0, "step": 4987 }, { "epoch": 1.7025089605734767, "grad_norm": 0.2072275834458961, "learning_rate": 2.4026302478502782e-05, "loss": 0.3071, "num_tokens": 952713469.0, "step": 4988 }, { "epoch": 1.7028503157535415, "grad_norm": 0.2197251360990376, "learning_rate": 2.401997976732423e-05, "loss": 0.2825, "num_tokens": 952892091.0, "step": 4989 }, { "epoch": 1.7031916709336063, "grad_norm": 0.21590686106951296, "learning_rate": 2.4013657056145676e-05, "loss": 0.34, "num_tokens": 953100576.0, "step": 4990 }, { "epoch": 1.7035330261136714, "grad_norm": 0.2079937190817733, "learning_rate": 2.4007334344967123e-05, "loss": 0.282, "num_tokens": 953300950.0, "step": 4991 }, { "epoch": 1.703874381293736, "grad_norm": 0.22874187160510367, "learning_rate": 2.400101163378857e-05, "loss": 0.3182, "num_tokens": 953483902.0, "step": 4992 }, { "epoch": 1.704215736473801, "grad_norm": 0.21468907443189184, "learning_rate": 2.3994688922610017e-05, "loss": 0.2911, "num_tokens": 953685688.0, "step": 4993 }, { "epoch": 1.7045570916538657, "grad_norm": 0.1796427584993532, "learning_rate": 2.398836621143146e-05, "loss": 0.3043, "num_tokens": 953916593.0, "step": 4994 }, { "epoch": 1.7048984468339308, "grad_norm": 0.20438456035806848, "learning_rate": 2.398204350025291e-05, "loss": 0.3081, "num_tokens": 954133970.0, "step": 4995 }, { "epoch": 1.7052398020139956, "grad_norm": 0.20451127983401823, "learning_rate": 2.3975720789074358e-05, "loss": 0.2795, "num_tokens": 954307081.0, "step": 4996 }, { "epoch": 1.7055811571940604, "grad_norm": 0.2241715570190021, "learning_rate": 2.3969398077895802e-05, "loss": 0.3285, "num_tokens": 954504800.0, "step": 4997 }, { "epoch": 1.7059225123741253, "grad_norm": 0.2420932048897661, "learning_rate": 2.396307536671725e-05, "loss": 0.3246, "num_tokens": 954676357.0, "step": 4998 }, { "epoch": 1.7062638675541901, "grad_norm": 0.22888111816743315, "learning_rate": 2.3956752655538696e-05, "loss": 0.2974, "num_tokens": 954830796.0, "step": 4999 }, { "epoch": 1.706605222734255, "grad_norm": 0.2020887668346331, "learning_rate": 2.3950429944360143e-05, "loss": 0.2924, "num_tokens": 955029327.0, "step": 5000 }, { "epoch": 1.7069465779143198, "grad_norm": 0.21722783067002618, "learning_rate": 2.394410723318159e-05, "loss": 0.3045, "num_tokens": 955243085.0, "step": 5001 }, { "epoch": 1.7072879330943849, "grad_norm": 0.23118100894044624, "learning_rate": 2.3937784522003037e-05, "loss": 0.2745, "num_tokens": 955376438.0, "step": 5002 }, { "epoch": 1.7076292882744495, "grad_norm": 0.22099792534967694, "learning_rate": 2.393146181082448e-05, "loss": 0.2851, "num_tokens": 955539180.0, "step": 5003 }, { "epoch": 1.7079706434545145, "grad_norm": 0.2042535377375408, "learning_rate": 2.392513909964593e-05, "loss": 0.2961, "num_tokens": 955738443.0, "step": 5004 }, { "epoch": 1.7083119986345792, "grad_norm": 0.22292582517278461, "learning_rate": 2.3918816388467378e-05, "loss": 0.3439, "num_tokens": 955946241.0, "step": 5005 }, { "epoch": 1.7086533538146442, "grad_norm": 0.2450079738899017, "learning_rate": 2.391249367728882e-05, "loss": 0.3209, "num_tokens": 956133596.0, "step": 5006 }, { "epoch": 1.7089947089947088, "grad_norm": 0.2188279447989016, "learning_rate": 2.3906170966110268e-05, "loss": 0.3077, "num_tokens": 956326403.0, "step": 5007 }, { "epoch": 1.709336064174774, "grad_norm": 0.20820120081655247, "learning_rate": 2.389984825493172e-05, "loss": 0.2714, "num_tokens": 956489844.0, "step": 5008 }, { "epoch": 1.7096774193548387, "grad_norm": 0.22829433856409187, "learning_rate": 2.3893525543753162e-05, "loss": 0.2893, "num_tokens": 956675101.0, "step": 5009 }, { "epoch": 1.7100187745349036, "grad_norm": 0.21730820520598504, "learning_rate": 2.388720283257461e-05, "loss": 0.2882, "num_tokens": 956850252.0, "step": 5010 }, { "epoch": 1.7103601297149684, "grad_norm": 0.24352633252945896, "learning_rate": 2.3880880121396056e-05, "loss": 0.2943, "num_tokens": 957028034.0, "step": 5011 }, { "epoch": 1.7107014848950333, "grad_norm": 0.22531487186749577, "learning_rate": 2.3874557410217503e-05, "loss": 0.3362, "num_tokens": 957234470.0, "step": 5012 }, { "epoch": 1.711042840075098, "grad_norm": 0.21394394204847406, "learning_rate": 2.386823469903895e-05, "loss": 0.3054, "num_tokens": 957407194.0, "step": 5013 }, { "epoch": 1.711384195255163, "grad_norm": 0.22804627243384948, "learning_rate": 2.3861911987860397e-05, "loss": 0.2722, "num_tokens": 957617388.0, "step": 5014 }, { "epoch": 1.711725550435228, "grad_norm": 0.2013648010173669, "learning_rate": 2.385558927668184e-05, "loss": 0.3102, "num_tokens": 957812670.0, "step": 5015 }, { "epoch": 1.7120669056152926, "grad_norm": 0.21331388592730433, "learning_rate": 2.3849266565503288e-05, "loss": 0.2895, "num_tokens": 957999506.0, "step": 5016 }, { "epoch": 1.7124082607953577, "grad_norm": 0.2036958310978948, "learning_rate": 2.3842943854324738e-05, "loss": 0.3047, "num_tokens": 958188556.0, "step": 5017 }, { "epoch": 1.7127496159754223, "grad_norm": 0.24683315913570855, "learning_rate": 2.3836621143146182e-05, "loss": 0.2979, "num_tokens": 958353995.0, "step": 5018 }, { "epoch": 1.7130909711554874, "grad_norm": 0.23186244963057892, "learning_rate": 2.383029843196763e-05, "loss": 0.3046, "num_tokens": 958528705.0, "step": 5019 }, { "epoch": 1.713432326335552, "grad_norm": 0.23422537476254238, "learning_rate": 2.3823975720789076e-05, "loss": 0.2904, "num_tokens": 958691197.0, "step": 5020 }, { "epoch": 1.713773681515617, "grad_norm": 0.21841167197363773, "learning_rate": 2.3817653009610523e-05, "loss": 0.2787, "num_tokens": 958872838.0, "step": 5021 }, { "epoch": 1.7141150366956819, "grad_norm": 0.21521088246825637, "learning_rate": 2.381133029843197e-05, "loss": 0.2774, "num_tokens": 959061422.0, "step": 5022 }, { "epoch": 1.7144563918757467, "grad_norm": 0.21315947645379818, "learning_rate": 2.3805007587253417e-05, "loss": 0.267, "num_tokens": 959235648.0, "step": 5023 }, { "epoch": 1.7147977470558116, "grad_norm": 0.2446200445759595, "learning_rate": 2.379868487607486e-05, "loss": 0.3183, "num_tokens": 959411961.0, "step": 5024 }, { "epoch": 1.7151391022358764, "grad_norm": 0.22263393743089976, "learning_rate": 2.379236216489631e-05, "loss": 0.3206, "num_tokens": 959596851.0, "step": 5025 }, { "epoch": 1.7154804574159412, "grad_norm": 0.23687319149375705, "learning_rate": 2.3786039453717758e-05, "loss": 0.3068, "num_tokens": 959789416.0, "step": 5026 }, { "epoch": 1.715821812596006, "grad_norm": 0.22217350921351187, "learning_rate": 2.37797167425392e-05, "loss": 0.349, "num_tokens": 959976665.0, "step": 5027 }, { "epoch": 1.7161631677760711, "grad_norm": 0.22760242955280097, "learning_rate": 2.3773394031360648e-05, "loss": 0.2977, "num_tokens": 960180092.0, "step": 5028 }, { "epoch": 1.7165045229561358, "grad_norm": 0.21688884271398676, "learning_rate": 2.3767071320182095e-05, "loss": 0.2833, "num_tokens": 960391652.0, "step": 5029 }, { "epoch": 1.7168458781362008, "grad_norm": 0.1970091106665903, "learning_rate": 2.3760748609003542e-05, "loss": 0.3143, "num_tokens": 960607138.0, "step": 5030 }, { "epoch": 1.7171872333162654, "grad_norm": 0.21429068623089972, "learning_rate": 2.375442589782499e-05, "loss": 0.3029, "num_tokens": 960784347.0, "step": 5031 }, { "epoch": 1.7175285884963305, "grad_norm": 0.22228624332767308, "learning_rate": 2.3748103186646433e-05, "loss": 0.282, "num_tokens": 960981206.0, "step": 5032 }, { "epoch": 1.7178699436763953, "grad_norm": 0.20929314214442754, "learning_rate": 2.374178047546788e-05, "loss": 0.3053, "num_tokens": 961172908.0, "step": 5033 }, { "epoch": 1.7182112988564602, "grad_norm": 0.2455690849800547, "learning_rate": 2.373545776428933e-05, "loss": 0.2909, "num_tokens": 961336706.0, "step": 5034 }, { "epoch": 1.718552654036525, "grad_norm": 0.21009996299237976, "learning_rate": 2.3729135053110774e-05, "loss": 0.2806, "num_tokens": 961538656.0, "step": 5035 }, { "epoch": 1.7188940092165899, "grad_norm": 0.20005040975296154, "learning_rate": 2.372281234193222e-05, "loss": 0.3328, "num_tokens": 961758698.0, "step": 5036 }, { "epoch": 1.7192353643966547, "grad_norm": 0.23515296530343657, "learning_rate": 2.3716489630753668e-05, "loss": 0.299, "num_tokens": 961938059.0, "step": 5037 }, { "epoch": 1.7195767195767195, "grad_norm": 0.24845915531877225, "learning_rate": 2.3710166919575115e-05, "loss": 0.29, "num_tokens": 962160615.0, "step": 5038 }, { "epoch": 1.7199180747567846, "grad_norm": 0.19384570806117107, "learning_rate": 2.370384420839656e-05, "loss": 0.293, "num_tokens": 962348449.0, "step": 5039 }, { "epoch": 1.7202594299368492, "grad_norm": 0.2071293172777201, "learning_rate": 2.369752149721801e-05, "loss": 0.3126, "num_tokens": 962546495.0, "step": 5040 }, { "epoch": 1.7206007851169143, "grad_norm": 0.21198616142881327, "learning_rate": 2.3691198786039452e-05, "loss": 0.2915, "num_tokens": 962731543.0, "step": 5041 }, { "epoch": 1.720942140296979, "grad_norm": 0.2147844404511418, "learning_rate": 2.3684876074860903e-05, "loss": 0.2838, "num_tokens": 962903491.0, "step": 5042 }, { "epoch": 1.721283495477044, "grad_norm": 0.2937871547087167, "learning_rate": 2.367855336368235e-05, "loss": 0.305, "num_tokens": 963103742.0, "step": 5043 }, { "epoch": 1.7216248506571086, "grad_norm": 0.21576927823870634, "learning_rate": 2.3672230652503793e-05, "loss": 0.3073, "num_tokens": 963318078.0, "step": 5044 }, { "epoch": 1.7219662058371736, "grad_norm": 0.20718752940949275, "learning_rate": 2.366590794132524e-05, "loss": 0.3086, "num_tokens": 963503710.0, "step": 5045 }, { "epoch": 1.7223075610172385, "grad_norm": 0.24110369948599286, "learning_rate": 2.3659585230146687e-05, "loss": 0.3328, "num_tokens": 963690291.0, "step": 5046 }, { "epoch": 1.7226489161973033, "grad_norm": 0.22293899527484493, "learning_rate": 2.3653262518968134e-05, "loss": 0.296, "num_tokens": 963850820.0, "step": 5047 }, { "epoch": 1.7229902713773682, "grad_norm": 0.2163177130729848, "learning_rate": 2.364693980778958e-05, "loss": 0.2868, "num_tokens": 964041220.0, "step": 5048 }, { "epoch": 1.723331626557433, "grad_norm": 0.20480339936303654, "learning_rate": 2.3640617096611028e-05, "loss": 0.3057, "num_tokens": 964243176.0, "step": 5049 }, { "epoch": 1.7236729817374978, "grad_norm": 0.2527150571079782, "learning_rate": 2.3634294385432475e-05, "loss": 0.316, "num_tokens": 964408696.0, "step": 5050 }, { "epoch": 1.7240143369175627, "grad_norm": 0.20315036768316283, "learning_rate": 2.3627971674253922e-05, "loss": 0.3106, "num_tokens": 964641278.0, "step": 5051 }, { "epoch": 1.7243556920976277, "grad_norm": 0.20669654421795894, "learning_rate": 2.362164896307537e-05, "loss": 0.2955, "num_tokens": 964808718.0, "step": 5052 }, { "epoch": 1.7246970472776924, "grad_norm": 0.22419530107653834, "learning_rate": 2.3615326251896813e-05, "loss": 0.2805, "num_tokens": 964987954.0, "step": 5053 }, { "epoch": 1.7250384024577574, "grad_norm": 0.2077048993560505, "learning_rate": 2.360900354071826e-05, "loss": 0.2982, "num_tokens": 965152725.0, "step": 5054 }, { "epoch": 1.725379757637822, "grad_norm": 0.21861919644101338, "learning_rate": 2.360268082953971e-05, "loss": 0.3245, "num_tokens": 965367016.0, "step": 5055 }, { "epoch": 1.725721112817887, "grad_norm": 0.20593147061972517, "learning_rate": 2.3596358118361154e-05, "loss": 0.3156, "num_tokens": 965586804.0, "step": 5056 }, { "epoch": 1.7260624679979517, "grad_norm": 0.2063755776971245, "learning_rate": 2.35900354071826e-05, "loss": 0.3097, "num_tokens": 965783788.0, "step": 5057 }, { "epoch": 1.7264038231780168, "grad_norm": 0.20424925099863236, "learning_rate": 2.3583712696004047e-05, "loss": 0.3067, "num_tokens": 965969008.0, "step": 5058 }, { "epoch": 1.7267451783580816, "grad_norm": 0.21721588869019473, "learning_rate": 2.3577389984825494e-05, "loss": 0.3155, "num_tokens": 966205569.0, "step": 5059 }, { "epoch": 1.7270865335381465, "grad_norm": 0.20832754072264859, "learning_rate": 2.357106727364694e-05, "loss": 0.2672, "num_tokens": 966381198.0, "step": 5060 }, { "epoch": 1.7274278887182113, "grad_norm": 0.2215164358059331, "learning_rate": 2.356474456246839e-05, "loss": 0.3259, "num_tokens": 966561263.0, "step": 5061 }, { "epoch": 1.7277692438982761, "grad_norm": 0.21228996739824807, "learning_rate": 2.3558421851289832e-05, "loss": 0.2826, "num_tokens": 966730071.0, "step": 5062 }, { "epoch": 1.728110599078341, "grad_norm": 0.2364664143915845, "learning_rate": 2.355209914011128e-05, "loss": 0.2893, "num_tokens": 966927558.0, "step": 5063 }, { "epoch": 1.7284519542584058, "grad_norm": 0.20635574072501414, "learning_rate": 2.354577642893273e-05, "loss": 0.3004, "num_tokens": 967100876.0, "step": 5064 }, { "epoch": 1.7287933094384709, "grad_norm": 0.23278982183814229, "learning_rate": 2.3539453717754173e-05, "loss": 0.2967, "num_tokens": 967300017.0, "step": 5065 }, { "epoch": 1.7291346646185355, "grad_norm": 0.20976349325892013, "learning_rate": 2.353313100657562e-05, "loss": 0.2859, "num_tokens": 967469458.0, "step": 5066 }, { "epoch": 1.7294760197986006, "grad_norm": 0.1957238699269953, "learning_rate": 2.3526808295397067e-05, "loss": 0.2799, "num_tokens": 967644303.0, "step": 5067 }, { "epoch": 1.7298173749786652, "grad_norm": 0.23039824238478826, "learning_rate": 2.3520485584218514e-05, "loss": 0.2744, "num_tokens": 967814724.0, "step": 5068 }, { "epoch": 1.7301587301587302, "grad_norm": 0.23010184082321491, "learning_rate": 2.351416287303996e-05, "loss": 0.3032, "num_tokens": 968000222.0, "step": 5069 }, { "epoch": 1.730500085338795, "grad_norm": 0.2093563643838442, "learning_rate": 2.3507840161861408e-05, "loss": 0.2919, "num_tokens": 968196244.0, "step": 5070 }, { "epoch": 1.73084144051886, "grad_norm": 0.17561111562150886, "learning_rate": 2.350151745068285e-05, "loss": 0.2792, "num_tokens": 968374826.0, "step": 5071 }, { "epoch": 1.7311827956989247, "grad_norm": 0.21437053461795746, "learning_rate": 2.3495194739504302e-05, "loss": 0.2986, "num_tokens": 968564697.0, "step": 5072 }, { "epoch": 1.7315241508789896, "grad_norm": 0.22573554124159956, "learning_rate": 2.348887202832575e-05, "loss": 0.2922, "num_tokens": 968728252.0, "step": 5073 }, { "epoch": 1.7318655060590544, "grad_norm": 0.2112245829374296, "learning_rate": 2.3482549317147192e-05, "loss": 0.303, "num_tokens": 968943123.0, "step": 5074 }, { "epoch": 1.7322068612391193, "grad_norm": 0.2013482423478449, "learning_rate": 2.347622660596864e-05, "loss": 0.2898, "num_tokens": 969127435.0, "step": 5075 }, { "epoch": 1.732548216419184, "grad_norm": 0.228865854018668, "learning_rate": 2.3469903894790086e-05, "loss": 0.3144, "num_tokens": 969308328.0, "step": 5076 }, { "epoch": 1.732889571599249, "grad_norm": 0.19617406366156356, "learning_rate": 2.3463581183611533e-05, "loss": 0.2823, "num_tokens": 969521739.0, "step": 5077 }, { "epoch": 1.733230926779314, "grad_norm": 0.19914787713667412, "learning_rate": 2.345725847243298e-05, "loss": 0.3231, "num_tokens": 969689307.0, "step": 5078 }, { "epoch": 1.7335722819593786, "grad_norm": 0.265008835655771, "learning_rate": 2.3450935761254427e-05, "loss": 0.3129, "num_tokens": 969911944.0, "step": 5079 }, { "epoch": 1.7339136371394437, "grad_norm": 0.21026414874589933, "learning_rate": 2.344461305007587e-05, "loss": 0.3113, "num_tokens": 970115288.0, "step": 5080 }, { "epoch": 1.7342549923195083, "grad_norm": 0.21441402357762343, "learning_rate": 2.343829033889732e-05, "loss": 0.3035, "num_tokens": 970277300.0, "step": 5081 }, { "epoch": 1.7345963474995734, "grad_norm": 0.21562249798429348, "learning_rate": 2.3431967627718768e-05, "loss": 0.3075, "num_tokens": 970481388.0, "step": 5082 }, { "epoch": 1.7349377026796382, "grad_norm": 0.1981861699454376, "learning_rate": 2.3425644916540212e-05, "loss": 0.3476, "num_tokens": 970696531.0, "step": 5083 }, { "epoch": 1.735279057859703, "grad_norm": 0.23396327557374555, "learning_rate": 2.341932220536166e-05, "loss": 0.3536, "num_tokens": 970918645.0, "step": 5084 }, { "epoch": 1.7356204130397679, "grad_norm": 0.18198636574853783, "learning_rate": 2.341299949418311e-05, "loss": 0.2941, "num_tokens": 971123787.0, "step": 5085 }, { "epoch": 1.7359617682198327, "grad_norm": 0.2332277331386063, "learning_rate": 2.3406676783004553e-05, "loss": 0.303, "num_tokens": 971274224.0, "step": 5086 }, { "epoch": 1.7363031233998976, "grad_norm": 0.21080350321278304, "learning_rate": 2.3400354071826e-05, "loss": 0.3198, "num_tokens": 971465901.0, "step": 5087 }, { "epoch": 1.7366444785799624, "grad_norm": 0.24393332167887966, "learning_rate": 2.3394031360647447e-05, "loss": 0.3508, "num_tokens": 971655108.0, "step": 5088 }, { "epoch": 1.7369858337600275, "grad_norm": 0.22362211584158856, "learning_rate": 2.3387708649468894e-05, "loss": 0.2735, "num_tokens": 971842690.0, "step": 5089 }, { "epoch": 1.737327188940092, "grad_norm": 0.21603535645050587, "learning_rate": 2.338138593829034e-05, "loss": 0.2952, "num_tokens": 972042139.0, "step": 5090 }, { "epoch": 1.7376685441201571, "grad_norm": 0.19167514807430017, "learning_rate": 2.3375063227111788e-05, "loss": 0.3362, "num_tokens": 972226522.0, "step": 5091 }, { "epoch": 1.7380098993002218, "grad_norm": 0.26205709651878345, "learning_rate": 2.336874051593323e-05, "loss": 0.3048, "num_tokens": 972400079.0, "step": 5092 }, { "epoch": 1.7383512544802868, "grad_norm": 0.20638553339110832, "learning_rate": 2.336241780475468e-05, "loss": 0.2864, "num_tokens": 972609327.0, "step": 5093 }, { "epoch": 1.7386926096603514, "grad_norm": 0.20438253252208377, "learning_rate": 2.335609509357613e-05, "loss": 0.2831, "num_tokens": 972797037.0, "step": 5094 }, { "epoch": 1.7390339648404165, "grad_norm": 0.21442165630027787, "learning_rate": 2.3349772382397572e-05, "loss": 0.3306, "num_tokens": 972954087.0, "step": 5095 }, { "epoch": 1.7393753200204813, "grad_norm": 0.2402326920797823, "learning_rate": 2.334344967121902e-05, "loss": 0.3011, "num_tokens": 973134391.0, "step": 5096 }, { "epoch": 1.7397166752005462, "grad_norm": 0.2276556346019307, "learning_rate": 2.3337126960040466e-05, "loss": 0.3265, "num_tokens": 973328216.0, "step": 5097 }, { "epoch": 1.740058030380611, "grad_norm": 0.22079884287078141, "learning_rate": 2.3330804248861913e-05, "loss": 0.3113, "num_tokens": 973528260.0, "step": 5098 }, { "epoch": 1.7403993855606759, "grad_norm": 0.19715253340755595, "learning_rate": 2.332448153768336e-05, "loss": 0.3322, "num_tokens": 973725435.0, "step": 5099 }, { "epoch": 1.7407407407407407, "grad_norm": 0.44047732393201594, "learning_rate": 2.3318158826504807e-05, "loss": 0.324, "num_tokens": 973888180.0, "step": 5100 }, { "epoch": 1.7410820959208055, "grad_norm": 0.23216895013848343, "learning_rate": 2.331183611532625e-05, "loss": 0.2823, "num_tokens": 974057000.0, "step": 5101 }, { "epoch": 1.7414234511008706, "grad_norm": 0.2244346076660613, "learning_rate": 2.33055134041477e-05, "loss": 0.3079, "num_tokens": 974221737.0, "step": 5102 }, { "epoch": 1.7417648062809352, "grad_norm": 0.24113532367205417, "learning_rate": 2.3299190692969148e-05, "loss": 0.2968, "num_tokens": 974389794.0, "step": 5103 }, { "epoch": 1.7421061614610003, "grad_norm": 0.21258579173832068, "learning_rate": 2.3292867981790592e-05, "loss": 0.3037, "num_tokens": 974576591.0, "step": 5104 }, { "epoch": 1.742447516641065, "grad_norm": 0.21987221259978895, "learning_rate": 2.328654527061204e-05, "loss": 0.2867, "num_tokens": 974748801.0, "step": 5105 }, { "epoch": 1.74278887182113, "grad_norm": 0.18192785379934553, "learning_rate": 2.3280222559433486e-05, "loss": 0.3109, "num_tokens": 974970233.0, "step": 5106 }, { "epoch": 1.7431302270011948, "grad_norm": 0.20207956637484947, "learning_rate": 2.3273899848254933e-05, "loss": 0.2744, "num_tokens": 975130858.0, "step": 5107 }, { "epoch": 1.7434715821812596, "grad_norm": 0.22870463278920722, "learning_rate": 2.326757713707638e-05, "loss": 0.2888, "num_tokens": 975337137.0, "step": 5108 }, { "epoch": 1.7438129373613245, "grad_norm": 0.19990660963292503, "learning_rate": 2.3261254425897827e-05, "loss": 0.3288, "num_tokens": 975550744.0, "step": 5109 }, { "epoch": 1.7441542925413893, "grad_norm": 0.22144795218517785, "learning_rate": 2.325493171471927e-05, "loss": 0.3157, "num_tokens": 975773659.0, "step": 5110 }, { "epoch": 1.7444956477214542, "grad_norm": 0.19327297119802078, "learning_rate": 2.324860900354072e-05, "loss": 0.3129, "num_tokens": 975978389.0, "step": 5111 }, { "epoch": 1.744837002901519, "grad_norm": 0.19198166392665988, "learning_rate": 2.3242286292362168e-05, "loss": 0.3016, "num_tokens": 976166199.0, "step": 5112 }, { "epoch": 1.7451783580815838, "grad_norm": 0.21693532782390226, "learning_rate": 2.323596358118361e-05, "loss": 0.3175, "num_tokens": 976368575.0, "step": 5113 }, { "epoch": 1.7455197132616487, "grad_norm": 0.22489300503962456, "learning_rate": 2.3229640870005058e-05, "loss": 0.3187, "num_tokens": 976578213.0, "step": 5114 }, { "epoch": 1.7458610684417137, "grad_norm": 0.22587991555836137, "learning_rate": 2.322331815882651e-05, "loss": 0.2959, "num_tokens": 976744860.0, "step": 5115 }, { "epoch": 1.7462024236217784, "grad_norm": 0.2157158031139283, "learning_rate": 2.3216995447647952e-05, "loss": 0.2864, "num_tokens": 976964742.0, "step": 5116 }, { "epoch": 1.7465437788018434, "grad_norm": 0.19891784417971128, "learning_rate": 2.32106727364694e-05, "loss": 0.3274, "num_tokens": 977166636.0, "step": 5117 }, { "epoch": 1.746885133981908, "grad_norm": 0.2389539406955519, "learning_rate": 2.3204350025290846e-05, "loss": 0.3169, "num_tokens": 977349386.0, "step": 5118 }, { "epoch": 1.747226489161973, "grad_norm": 0.21816203652184127, "learning_rate": 2.3198027314112293e-05, "loss": 0.317, "num_tokens": 977585258.0, "step": 5119 }, { "epoch": 1.747567844342038, "grad_norm": 0.18084045702913396, "learning_rate": 2.319170460293374e-05, "loss": 0.2983, "num_tokens": 977789481.0, "step": 5120 }, { "epoch": 1.7479091995221028, "grad_norm": 0.2063913992821946, "learning_rate": 2.3185381891755187e-05, "loss": 0.2767, "num_tokens": 977993001.0, "step": 5121 }, { "epoch": 1.7482505547021676, "grad_norm": 0.2010303241310977, "learning_rate": 2.317905918057663e-05, "loss": 0.296, "num_tokens": 978179664.0, "step": 5122 }, { "epoch": 1.7485919098822325, "grad_norm": 0.21391921183495743, "learning_rate": 2.3172736469398078e-05, "loss": 0.3361, "num_tokens": 978420591.0, "step": 5123 }, { "epoch": 1.7489332650622973, "grad_norm": 0.20066222468532335, "learning_rate": 2.3166413758219528e-05, "loss": 0.2994, "num_tokens": 978593003.0, "step": 5124 }, { "epoch": 1.7492746202423621, "grad_norm": 0.23238997360714186, "learning_rate": 2.316009104704097e-05, "loss": 0.2899, "num_tokens": 978754864.0, "step": 5125 }, { "epoch": 1.7496159754224272, "grad_norm": 0.22977460845148415, "learning_rate": 2.315376833586242e-05, "loss": 0.2705, "num_tokens": 978925719.0, "step": 5126 }, { "epoch": 1.7499573306024918, "grad_norm": 0.1930025285232138, "learning_rate": 2.3147445624683866e-05, "loss": 0.3127, "num_tokens": 979131075.0, "step": 5127 }, { "epoch": 1.7502986857825569, "grad_norm": 0.22330327088843618, "learning_rate": 2.3141122913505313e-05, "loss": 0.299, "num_tokens": 979349616.0, "step": 5128 }, { "epoch": 1.7506400409626215, "grad_norm": 0.21665255177956022, "learning_rate": 2.313480020232676e-05, "loss": 0.3154, "num_tokens": 979515566.0, "step": 5129 }, { "epoch": 1.7509813961426866, "grad_norm": 0.21471360040138673, "learning_rate": 2.3128477491148207e-05, "loss": 0.311, "num_tokens": 979743125.0, "step": 5130 }, { "epoch": 1.7513227513227512, "grad_norm": 0.18360920706030473, "learning_rate": 2.312215477996965e-05, "loss": 0.3232, "num_tokens": 979942329.0, "step": 5131 }, { "epoch": 1.7516641065028162, "grad_norm": 0.24406705357664144, "learning_rate": 2.31158320687911e-05, "loss": 0.3093, "num_tokens": 980130359.0, "step": 5132 }, { "epoch": 1.752005461682881, "grad_norm": 0.20736472755874968, "learning_rate": 2.3109509357612547e-05, "loss": 0.2776, "num_tokens": 980315547.0, "step": 5133 }, { "epoch": 1.752346816862946, "grad_norm": 0.21308385527278725, "learning_rate": 2.310318664643399e-05, "loss": 0.3146, "num_tokens": 980508146.0, "step": 5134 }, { "epoch": 1.7526881720430108, "grad_norm": 0.20144268452923403, "learning_rate": 2.3096863935255438e-05, "loss": 0.2912, "num_tokens": 980733393.0, "step": 5135 }, { "epoch": 1.7530295272230756, "grad_norm": 0.1842970772604969, "learning_rate": 2.3090541224076885e-05, "loss": 0.2826, "num_tokens": 980940506.0, "step": 5136 }, { "epoch": 1.7533708824031404, "grad_norm": 0.21807610443551106, "learning_rate": 2.3084218512898332e-05, "loss": 0.3156, "num_tokens": 981133543.0, "step": 5137 }, { "epoch": 1.7537122375832053, "grad_norm": 0.21824636145243756, "learning_rate": 2.307789580171978e-05, "loss": 0.3183, "num_tokens": 981349836.0, "step": 5138 }, { "epoch": 1.7540535927632703, "grad_norm": 0.22394072524579514, "learning_rate": 2.3071573090541226e-05, "loss": 0.3319, "num_tokens": 981578187.0, "step": 5139 }, { "epoch": 1.754394947943335, "grad_norm": 0.22002475755515388, "learning_rate": 2.306525037936267e-05, "loss": 0.303, "num_tokens": 981826140.0, "step": 5140 }, { "epoch": 1.7547363031234, "grad_norm": 0.1974968156285382, "learning_rate": 2.305892766818412e-05, "loss": 0.3132, "num_tokens": 982013728.0, "step": 5141 }, { "epoch": 1.7550776583034646, "grad_norm": 0.20810489303674276, "learning_rate": 2.3052604957005567e-05, "loss": 0.3076, "num_tokens": 982214461.0, "step": 5142 }, { "epoch": 1.7554190134835297, "grad_norm": 0.22967927070060895, "learning_rate": 2.304628224582701e-05, "loss": 0.2874, "num_tokens": 982395348.0, "step": 5143 }, { "epoch": 1.7557603686635943, "grad_norm": 0.2213680089845639, "learning_rate": 2.3039959534648458e-05, "loss": 0.323, "num_tokens": 982588295.0, "step": 5144 }, { "epoch": 1.7561017238436594, "grad_norm": 0.1991916978841199, "learning_rate": 2.3033636823469908e-05, "loss": 0.2908, "num_tokens": 982744092.0, "step": 5145 }, { "epoch": 1.7564430790237242, "grad_norm": 0.2386824567377208, "learning_rate": 2.302731411229135e-05, "loss": 0.3048, "num_tokens": 982933266.0, "step": 5146 }, { "epoch": 1.756784434203789, "grad_norm": 0.23489261813327064, "learning_rate": 2.30209914011128e-05, "loss": 0.32, "num_tokens": 983127560.0, "step": 5147 }, { "epoch": 1.7571257893838539, "grad_norm": 0.22263966926959788, "learning_rate": 2.3014668689934245e-05, "loss": 0.2992, "num_tokens": 983301136.0, "step": 5148 }, { "epoch": 1.7574671445639187, "grad_norm": 0.24414006023346224, "learning_rate": 2.3008345978755692e-05, "loss": 0.317, "num_tokens": 983496356.0, "step": 5149 }, { "epoch": 1.7578084997439836, "grad_norm": 0.19907541432437456, "learning_rate": 2.300202326757714e-05, "loss": 0.3202, "num_tokens": 983727545.0, "step": 5150 }, { "epoch": 1.7581498549240484, "grad_norm": 0.2224753677556103, "learning_rate": 2.2995700556398586e-05, "loss": 0.3088, "num_tokens": 983906192.0, "step": 5151 }, { "epoch": 1.7584912101041135, "grad_norm": 0.22518214061148592, "learning_rate": 2.298937784522003e-05, "loss": 0.3085, "num_tokens": 984061914.0, "step": 5152 }, { "epoch": 1.758832565284178, "grad_norm": 0.2566920000183094, "learning_rate": 2.2983055134041477e-05, "loss": 0.273, "num_tokens": 984228012.0, "step": 5153 }, { "epoch": 1.7591739204642431, "grad_norm": 0.20273945207118382, "learning_rate": 2.2976732422862927e-05, "loss": 0.3111, "num_tokens": 984404304.0, "step": 5154 }, { "epoch": 1.7595152756443078, "grad_norm": 0.22307052200799374, "learning_rate": 2.297040971168437e-05, "loss": 0.3131, "num_tokens": 984600925.0, "step": 5155 }, { "epoch": 1.7598566308243728, "grad_norm": 0.23112231591371182, "learning_rate": 2.2964087000505818e-05, "loss": 0.3573, "num_tokens": 984817903.0, "step": 5156 }, { "epoch": 1.7601979860044377, "grad_norm": 0.21259328279776085, "learning_rate": 2.2957764289327265e-05, "loss": 0.3018, "num_tokens": 985010119.0, "step": 5157 }, { "epoch": 1.7605393411845025, "grad_norm": 0.20588418318724472, "learning_rate": 2.2951441578148712e-05, "loss": 0.3018, "num_tokens": 985193703.0, "step": 5158 }, { "epoch": 1.7608806963645673, "grad_norm": 0.23188053620429885, "learning_rate": 2.294511886697016e-05, "loss": 0.3078, "num_tokens": 985367644.0, "step": 5159 }, { "epoch": 1.7612220515446322, "grad_norm": 0.18816715647859458, "learning_rate": 2.2938796155791606e-05, "loss": 0.3301, "num_tokens": 985551430.0, "step": 5160 }, { "epoch": 1.761563406724697, "grad_norm": 0.2696410534269297, "learning_rate": 2.293247344461305e-05, "loss": 0.2852, "num_tokens": 985715437.0, "step": 5161 }, { "epoch": 1.7619047619047619, "grad_norm": 0.23171697857257403, "learning_rate": 2.29261507334345e-05, "loss": 0.2885, "num_tokens": 985858992.0, "step": 5162 }, { "epoch": 1.762246117084827, "grad_norm": 0.22124473514209225, "learning_rate": 2.2919828022255947e-05, "loss": 0.34, "num_tokens": 986012918.0, "step": 5163 }, { "epoch": 1.7625874722648915, "grad_norm": 0.24807990113862471, "learning_rate": 2.291350531107739e-05, "loss": 0.3222, "num_tokens": 986187351.0, "step": 5164 }, { "epoch": 1.7629288274449566, "grad_norm": 0.2201541467978732, "learning_rate": 2.2907182599898837e-05, "loss": 0.3166, "num_tokens": 986427299.0, "step": 5165 }, { "epoch": 1.7632701826250212, "grad_norm": 0.2474600994428585, "learning_rate": 2.2900859888720284e-05, "loss": 0.3013, "num_tokens": 986574014.0, "step": 5166 }, { "epoch": 1.7636115378050863, "grad_norm": 0.22241080443411312, "learning_rate": 2.289453717754173e-05, "loss": 0.2891, "num_tokens": 986752115.0, "step": 5167 }, { "epoch": 1.763952892985151, "grad_norm": 0.2266839503941028, "learning_rate": 2.288821446636318e-05, "loss": 0.3124, "num_tokens": 986951262.0, "step": 5168 }, { "epoch": 1.764294248165216, "grad_norm": 0.2298585560551426, "learning_rate": 2.2881891755184622e-05, "loss": 0.2941, "num_tokens": 987130908.0, "step": 5169 }, { "epoch": 1.7646356033452808, "grad_norm": 0.2137201770124585, "learning_rate": 2.287556904400607e-05, "loss": 0.3018, "num_tokens": 987302834.0, "step": 5170 }, { "epoch": 1.7649769585253456, "grad_norm": 0.23007714306578034, "learning_rate": 2.286924633282752e-05, "loss": 0.2807, "num_tokens": 987533038.0, "step": 5171 }, { "epoch": 1.7653183137054105, "grad_norm": 0.21793908904437753, "learning_rate": 2.2862923621648963e-05, "loss": 0.286, "num_tokens": 987715177.0, "step": 5172 }, { "epoch": 1.7656596688854753, "grad_norm": 0.2436227773979359, "learning_rate": 2.285660091047041e-05, "loss": 0.3314, "num_tokens": 987882484.0, "step": 5173 }, { "epoch": 1.7660010240655402, "grad_norm": 0.22878486783788504, "learning_rate": 2.2850278199291857e-05, "loss": 0.2922, "num_tokens": 988063658.0, "step": 5174 }, { "epoch": 1.766342379245605, "grad_norm": 0.18889258537439862, "learning_rate": 2.2843955488113304e-05, "loss": 0.3569, "num_tokens": 988296699.0, "step": 5175 }, { "epoch": 1.76668373442567, "grad_norm": 0.21798489173151253, "learning_rate": 2.283763277693475e-05, "loss": 0.3049, "num_tokens": 988482142.0, "step": 5176 }, { "epoch": 1.7670250896057347, "grad_norm": 0.21341733795654588, "learning_rate": 2.2831310065756198e-05, "loss": 0.313, "num_tokens": 988700451.0, "step": 5177 }, { "epoch": 1.7673664447857997, "grad_norm": 0.21448943192714504, "learning_rate": 2.282498735457764e-05, "loss": 0.2716, "num_tokens": 988868712.0, "step": 5178 }, { "epoch": 1.7677077999658644, "grad_norm": 0.19774515927094538, "learning_rate": 2.2818664643399092e-05, "loss": 0.3087, "num_tokens": 989126336.0, "step": 5179 }, { "epoch": 1.7680491551459294, "grad_norm": 0.20571546753001851, "learning_rate": 2.281234193222054e-05, "loss": 0.317, "num_tokens": 989331805.0, "step": 5180 }, { "epoch": 1.768390510325994, "grad_norm": 0.2047856046888824, "learning_rate": 2.2806019221041982e-05, "loss": 0.3275, "num_tokens": 989561868.0, "step": 5181 }, { "epoch": 1.768731865506059, "grad_norm": 0.2020510453489954, "learning_rate": 2.279969650986343e-05, "loss": 0.3012, "num_tokens": 989765091.0, "step": 5182 }, { "epoch": 1.769073220686124, "grad_norm": 0.2079315482368989, "learning_rate": 2.2793373798684876e-05, "loss": 0.3167, "num_tokens": 989970247.0, "step": 5183 }, { "epoch": 1.7694145758661888, "grad_norm": 0.22462484752799566, "learning_rate": 2.2787051087506323e-05, "loss": 0.2956, "num_tokens": 990152137.0, "step": 5184 }, { "epoch": 1.7697559310462536, "grad_norm": 0.1843476992103604, "learning_rate": 2.278072837632777e-05, "loss": 0.3137, "num_tokens": 990346102.0, "step": 5185 }, { "epoch": 1.7700972862263185, "grad_norm": 0.22624954242854686, "learning_rate": 2.2774405665149217e-05, "loss": 0.331, "num_tokens": 990566977.0, "step": 5186 }, { "epoch": 1.7704386414063833, "grad_norm": 0.2043579378365924, "learning_rate": 2.276808295397066e-05, "loss": 0.305, "num_tokens": 990768709.0, "step": 5187 }, { "epoch": 1.7707799965864481, "grad_norm": 0.19746431291540562, "learning_rate": 2.276176024279211e-05, "loss": 0.2948, "num_tokens": 990938377.0, "step": 5188 }, { "epoch": 1.7711213517665132, "grad_norm": 0.2236540356074635, "learning_rate": 2.2755437531613558e-05, "loss": 0.3004, "num_tokens": 991162428.0, "step": 5189 }, { "epoch": 1.7714627069465778, "grad_norm": 0.21501757935840804, "learning_rate": 2.2749114820435002e-05, "loss": 0.288, "num_tokens": 991313528.0, "step": 5190 }, { "epoch": 1.7718040621266429, "grad_norm": 0.22049683681132598, "learning_rate": 2.274279210925645e-05, "loss": 0.3171, "num_tokens": 991497435.0, "step": 5191 }, { "epoch": 1.7721454173067075, "grad_norm": 0.23767917222708324, "learning_rate": 2.27364693980779e-05, "loss": 0.2901, "num_tokens": 991664700.0, "step": 5192 }, { "epoch": 1.7724867724867726, "grad_norm": 0.216413895540331, "learning_rate": 2.2730146686899343e-05, "loss": 0.277, "num_tokens": 991837610.0, "step": 5193 }, { "epoch": 1.7728281276668374, "grad_norm": 0.21812152142608918, "learning_rate": 2.272382397572079e-05, "loss": 0.2792, "num_tokens": 992013047.0, "step": 5194 }, { "epoch": 1.7731694828469022, "grad_norm": 0.20773921896462644, "learning_rate": 2.2717501264542237e-05, "loss": 0.2791, "num_tokens": 992173908.0, "step": 5195 }, { "epoch": 1.773510838026967, "grad_norm": 0.21737119465771895, "learning_rate": 2.2711178553363684e-05, "loss": 0.3132, "num_tokens": 992365566.0, "step": 5196 }, { "epoch": 1.773852193207032, "grad_norm": 0.2144439508181481, "learning_rate": 2.270485584218513e-05, "loss": 0.292, "num_tokens": 992566675.0, "step": 5197 }, { "epoch": 1.7741935483870968, "grad_norm": 0.19421471072749869, "learning_rate": 2.2698533131006578e-05, "loss": 0.2861, "num_tokens": 992769793.0, "step": 5198 }, { "epoch": 1.7745349035671616, "grad_norm": 0.19849507003893085, "learning_rate": 2.269221041982802e-05, "loss": 0.3215, "num_tokens": 992974340.0, "step": 5199 }, { "epoch": 1.7748762587472267, "grad_norm": 0.23174565091300553, "learning_rate": 2.2685887708649468e-05, "loss": 0.3492, "num_tokens": 993183622.0, "step": 5200 }, { "epoch": 1.7752176139272913, "grad_norm": 0.22094252248400212, "learning_rate": 2.267956499747092e-05, "loss": 0.3248, "num_tokens": 993363524.0, "step": 5201 }, { "epoch": 1.7755589691073563, "grad_norm": 0.21599142732413726, "learning_rate": 2.2673242286292362e-05, "loss": 0.2883, "num_tokens": 993523349.0, "step": 5202 }, { "epoch": 1.775900324287421, "grad_norm": 0.2245658812146261, "learning_rate": 2.266691957511381e-05, "loss": 0.3142, "num_tokens": 993694172.0, "step": 5203 }, { "epoch": 1.776241679467486, "grad_norm": 0.21427515508715844, "learning_rate": 2.2660596863935256e-05, "loss": 0.2796, "num_tokens": 993867899.0, "step": 5204 }, { "epoch": 1.7765830346475506, "grad_norm": 0.22572415598699128, "learning_rate": 2.2654274152756703e-05, "loss": 0.2746, "num_tokens": 994055906.0, "step": 5205 }, { "epoch": 1.7769243898276157, "grad_norm": 0.23214009735640126, "learning_rate": 2.264795144157815e-05, "loss": 0.3412, "num_tokens": 994263876.0, "step": 5206 }, { "epoch": 1.7772657450076805, "grad_norm": 0.1767515187860919, "learning_rate": 2.2641628730399597e-05, "loss": 0.2856, "num_tokens": 994445615.0, "step": 5207 }, { "epoch": 1.7776071001877454, "grad_norm": 0.23478092406160353, "learning_rate": 2.263530601922104e-05, "loss": 0.2882, "num_tokens": 994579855.0, "step": 5208 }, { "epoch": 1.7779484553678102, "grad_norm": 0.23381580733057084, "learning_rate": 2.262898330804249e-05, "loss": 0.3161, "num_tokens": 994782833.0, "step": 5209 }, { "epoch": 1.778289810547875, "grad_norm": 0.20866425630999796, "learning_rate": 2.2622660596863938e-05, "loss": 0.2911, "num_tokens": 994988559.0, "step": 5210 }, { "epoch": 1.77863116572794, "grad_norm": 0.22684093774548822, "learning_rate": 2.261633788568538e-05, "loss": 0.2853, "num_tokens": 995166898.0, "step": 5211 }, { "epoch": 1.7789725209080047, "grad_norm": 0.1998461902409049, "learning_rate": 2.261001517450683e-05, "loss": 0.284, "num_tokens": 995334215.0, "step": 5212 }, { "epoch": 1.7793138760880698, "grad_norm": 0.20528761979644608, "learning_rate": 2.2603692463328276e-05, "loss": 0.305, "num_tokens": 995516078.0, "step": 5213 }, { "epoch": 1.7796552312681344, "grad_norm": 0.23639778032677186, "learning_rate": 2.2597369752149723e-05, "loss": 0.2901, "num_tokens": 995666066.0, "step": 5214 }, { "epoch": 1.7799965864481995, "grad_norm": 0.26092389937545035, "learning_rate": 2.259104704097117e-05, "loss": 0.3003, "num_tokens": 995853345.0, "step": 5215 }, { "epoch": 1.780337941628264, "grad_norm": 0.2141017224588889, "learning_rate": 2.2584724329792617e-05, "loss": 0.3069, "num_tokens": 996028307.0, "step": 5216 }, { "epoch": 1.7806792968083291, "grad_norm": 0.22394494476408777, "learning_rate": 2.257840161861406e-05, "loss": 0.3198, "num_tokens": 996196343.0, "step": 5217 }, { "epoch": 1.7810206519883938, "grad_norm": 0.36375027502169993, "learning_rate": 2.257207890743551e-05, "loss": 0.2731, "num_tokens": 996353728.0, "step": 5218 }, { "epoch": 1.7813620071684588, "grad_norm": 0.1941009673139872, "learning_rate": 2.2565756196256958e-05, "loss": 0.3015, "num_tokens": 996536528.0, "step": 5219 }, { "epoch": 1.7817033623485237, "grad_norm": 0.243370958148136, "learning_rate": 2.25594334850784e-05, "loss": 0.3439, "num_tokens": 996761970.0, "step": 5220 }, { "epoch": 1.7820447175285885, "grad_norm": 0.21908977669951138, "learning_rate": 2.2553110773899848e-05, "loss": 0.2916, "num_tokens": 996921280.0, "step": 5221 }, { "epoch": 1.7823860727086533, "grad_norm": 0.22887270834744908, "learning_rate": 2.25467880627213e-05, "loss": 0.3163, "num_tokens": 997115174.0, "step": 5222 }, { "epoch": 1.7827274278887182, "grad_norm": 0.20489020796441132, "learning_rate": 2.2540465351542742e-05, "loss": 0.2864, "num_tokens": 997290431.0, "step": 5223 }, { "epoch": 1.783068783068783, "grad_norm": 0.23331919319382013, "learning_rate": 2.253414264036419e-05, "loss": 0.3188, "num_tokens": 997478073.0, "step": 5224 }, { "epoch": 1.7834101382488479, "grad_norm": 0.2618299597239065, "learning_rate": 2.2527819929185636e-05, "loss": 0.3176, "num_tokens": 997634750.0, "step": 5225 }, { "epoch": 1.783751493428913, "grad_norm": 0.19463065722504966, "learning_rate": 2.2521497218007083e-05, "loss": 0.2931, "num_tokens": 997828796.0, "step": 5226 }, { "epoch": 1.7840928486089775, "grad_norm": 0.21139174427139112, "learning_rate": 2.251517450682853e-05, "loss": 0.3014, "num_tokens": 998040309.0, "step": 5227 }, { "epoch": 1.7844342037890426, "grad_norm": 0.21987184660206516, "learning_rate": 2.2508851795649977e-05, "loss": 0.2946, "num_tokens": 998228066.0, "step": 5228 }, { "epoch": 1.7847755589691072, "grad_norm": 0.1954629491294883, "learning_rate": 2.250252908447142e-05, "loss": 0.2751, "num_tokens": 998386886.0, "step": 5229 }, { "epoch": 1.7851169141491723, "grad_norm": 0.222771813663693, "learning_rate": 2.2496206373292868e-05, "loss": 0.2952, "num_tokens": 998580764.0, "step": 5230 }, { "epoch": 1.7854582693292371, "grad_norm": 0.22993521334343098, "learning_rate": 2.2489883662114318e-05, "loss": 0.3128, "num_tokens": 998768997.0, "step": 5231 }, { "epoch": 1.785799624509302, "grad_norm": 0.18906998353320112, "learning_rate": 2.248356095093576e-05, "loss": 0.2977, "num_tokens": 998954388.0, "step": 5232 }, { "epoch": 1.7861409796893668, "grad_norm": 0.235515637756438, "learning_rate": 2.247723823975721e-05, "loss": 0.2854, "num_tokens": 999130977.0, "step": 5233 }, { "epoch": 1.7864823348694316, "grad_norm": 0.2061504330121277, "learning_rate": 2.2470915528578656e-05, "loss": 0.3045, "num_tokens": 999304440.0, "step": 5234 }, { "epoch": 1.7868236900494965, "grad_norm": 0.22334889278996434, "learning_rate": 2.2464592817400102e-05, "loss": 0.294, "num_tokens": 999485536.0, "step": 5235 }, { "epoch": 1.7871650452295613, "grad_norm": 0.21839499730772055, "learning_rate": 2.245827010622155e-05, "loss": 0.3131, "num_tokens": 999688192.0, "step": 5236 }, { "epoch": 1.7875064004096262, "grad_norm": 0.23210081502335803, "learning_rate": 2.2451947395042996e-05, "loss": 0.2853, "num_tokens": 999862891.0, "step": 5237 }, { "epoch": 1.787847755589691, "grad_norm": 0.22126084625578593, "learning_rate": 2.244562468386444e-05, "loss": 0.2788, "num_tokens": 1000040139.0, "step": 5238 }, { "epoch": 1.788189110769756, "grad_norm": 0.2038267515588224, "learning_rate": 2.243930197268589e-05, "loss": 0.2906, "num_tokens": 1000226951.0, "step": 5239 }, { "epoch": 1.7885304659498207, "grad_norm": 0.22717400816719563, "learning_rate": 2.2432979261507337e-05, "loss": 0.3005, "num_tokens": 1000429588.0, "step": 5240 }, { "epoch": 1.7888718211298857, "grad_norm": 0.2063294481914824, "learning_rate": 2.242665655032878e-05, "loss": 0.3203, "num_tokens": 1000600250.0, "step": 5241 }, { "epoch": 1.7892131763099504, "grad_norm": 0.2799142183382836, "learning_rate": 2.2420333839150228e-05, "loss": 0.3258, "num_tokens": 1000767240.0, "step": 5242 }, { "epoch": 1.7895545314900154, "grad_norm": 0.23104842630474834, "learning_rate": 2.2414011127971675e-05, "loss": 0.3474, "num_tokens": 1000972200.0, "step": 5243 }, { "epoch": 1.7898958866700803, "grad_norm": 0.20033204273609953, "learning_rate": 2.2407688416793122e-05, "loss": 0.3028, "num_tokens": 1001154597.0, "step": 5244 }, { "epoch": 1.790237241850145, "grad_norm": 0.2728170871934456, "learning_rate": 2.240136570561457e-05, "loss": 0.31, "num_tokens": 1001332003.0, "step": 5245 }, { "epoch": 1.79057859703021, "grad_norm": 0.1950987198750671, "learning_rate": 2.2395042994436016e-05, "loss": 0.2591, "num_tokens": 1001520407.0, "step": 5246 }, { "epoch": 1.7909199522102748, "grad_norm": 0.22658425490610623, "learning_rate": 2.238872028325746e-05, "loss": 0.313, "num_tokens": 1001703573.0, "step": 5247 }, { "epoch": 1.7912613073903396, "grad_norm": 0.20173289310768777, "learning_rate": 2.238239757207891e-05, "loss": 0.3187, "num_tokens": 1001904570.0, "step": 5248 }, { "epoch": 1.7916026625704045, "grad_norm": 0.21015553987253272, "learning_rate": 2.2376074860900357e-05, "loss": 0.3395, "num_tokens": 1002105198.0, "step": 5249 }, { "epoch": 1.7919440177504695, "grad_norm": 0.22670704934179933, "learning_rate": 2.23697521497218e-05, "loss": 0.3434, "num_tokens": 1002278360.0, "step": 5250 }, { "epoch": 1.7922853729305341, "grad_norm": 0.21995496130768122, "learning_rate": 2.2363429438543247e-05, "loss": 0.3107, "num_tokens": 1002443221.0, "step": 5251 }, { "epoch": 1.7926267281105992, "grad_norm": 0.21282353760426062, "learning_rate": 2.2357106727364698e-05, "loss": 0.3065, "num_tokens": 1002655160.0, "step": 5252 }, { "epoch": 1.7929680832906638, "grad_norm": 0.22893093907772485, "learning_rate": 2.235078401618614e-05, "loss": 0.2939, "num_tokens": 1002827944.0, "step": 5253 }, { "epoch": 1.7933094384707289, "grad_norm": 0.23503517409245303, "learning_rate": 2.234446130500759e-05, "loss": 0.2889, "num_tokens": 1003003244.0, "step": 5254 }, { "epoch": 1.7936507936507935, "grad_norm": 0.19839981330944118, "learning_rate": 2.2338138593829035e-05, "loss": 0.3044, "num_tokens": 1003184665.0, "step": 5255 }, { "epoch": 1.7939921488308586, "grad_norm": 0.20930644790242522, "learning_rate": 2.2331815882650482e-05, "loss": 0.3073, "num_tokens": 1003407750.0, "step": 5256 }, { "epoch": 1.7943335040109234, "grad_norm": 0.2051378855952983, "learning_rate": 2.232549317147193e-05, "loss": 0.3044, "num_tokens": 1003604434.0, "step": 5257 }, { "epoch": 1.7946748591909882, "grad_norm": 0.2293711151797761, "learning_rate": 2.2319170460293376e-05, "loss": 0.312, "num_tokens": 1003766302.0, "step": 5258 }, { "epoch": 1.795016214371053, "grad_norm": 0.20376167368036208, "learning_rate": 2.231284774911482e-05, "loss": 0.3035, "num_tokens": 1003939841.0, "step": 5259 }, { "epoch": 1.795357569551118, "grad_norm": 0.21979270243804838, "learning_rate": 2.2306525037936267e-05, "loss": 0.3038, "num_tokens": 1004112200.0, "step": 5260 }, { "epoch": 1.7956989247311828, "grad_norm": 0.23194124230440114, "learning_rate": 2.2300202326757717e-05, "loss": 0.2909, "num_tokens": 1004255878.0, "step": 5261 }, { "epoch": 1.7960402799112476, "grad_norm": 0.2507449084820506, "learning_rate": 2.229387961557916e-05, "loss": 0.3186, "num_tokens": 1004440086.0, "step": 5262 }, { "epoch": 1.7963816350913127, "grad_norm": 0.22126792656781805, "learning_rate": 2.2287556904400608e-05, "loss": 0.3157, "num_tokens": 1004619423.0, "step": 5263 }, { "epoch": 1.7967229902713773, "grad_norm": 0.21536929025847631, "learning_rate": 2.2281234193222055e-05, "loss": 0.3222, "num_tokens": 1004821464.0, "step": 5264 }, { "epoch": 1.7970643454514423, "grad_norm": 0.23844232253342346, "learning_rate": 2.2274911482043502e-05, "loss": 0.3105, "num_tokens": 1004994455.0, "step": 5265 }, { "epoch": 1.797405700631507, "grad_norm": 0.21197053166055843, "learning_rate": 2.226858877086495e-05, "loss": 0.3165, "num_tokens": 1005169614.0, "step": 5266 }, { "epoch": 1.797747055811572, "grad_norm": 0.23419758866318288, "learning_rate": 2.2262266059686396e-05, "loss": 0.2993, "num_tokens": 1005374680.0, "step": 5267 }, { "epoch": 1.7980884109916366, "grad_norm": 0.20409696648527276, "learning_rate": 2.225594334850784e-05, "loss": 0.3013, "num_tokens": 1005569213.0, "step": 5268 }, { "epoch": 1.7984297661717017, "grad_norm": 0.3352020414438965, "learning_rate": 2.224962063732929e-05, "loss": 0.3055, "num_tokens": 1005764749.0, "step": 5269 }, { "epoch": 1.7987711213517665, "grad_norm": 0.21168392751512582, "learning_rate": 2.2243297926150737e-05, "loss": 0.2843, "num_tokens": 1005939050.0, "step": 5270 }, { "epoch": 1.7991124765318314, "grad_norm": 0.2300719008507104, "learning_rate": 2.223697521497218e-05, "loss": 0.2997, "num_tokens": 1006123503.0, "step": 5271 }, { "epoch": 1.7994538317118962, "grad_norm": 0.22119377925240363, "learning_rate": 2.2230652503793627e-05, "loss": 0.3172, "num_tokens": 1006299436.0, "step": 5272 }, { "epoch": 1.799795186891961, "grad_norm": 0.23741482639003128, "learning_rate": 2.2224329792615074e-05, "loss": 0.3137, "num_tokens": 1006473129.0, "step": 5273 }, { "epoch": 1.800136542072026, "grad_norm": 0.23923551818375197, "learning_rate": 2.221800708143652e-05, "loss": 0.2928, "num_tokens": 1006618468.0, "step": 5274 }, { "epoch": 1.8004778972520907, "grad_norm": 0.24175623690446524, "learning_rate": 2.2211684370257968e-05, "loss": 0.284, "num_tokens": 1006782450.0, "step": 5275 }, { "epoch": 1.8008192524321558, "grad_norm": 0.22382240507186907, "learning_rate": 2.2205361659079415e-05, "loss": 0.2886, "num_tokens": 1006972177.0, "step": 5276 }, { "epoch": 1.8011606076122204, "grad_norm": 0.1863772245987612, "learning_rate": 2.219903894790086e-05, "loss": 0.3152, "num_tokens": 1007153866.0, "step": 5277 }, { "epoch": 1.8015019627922855, "grad_norm": 0.24237814092635115, "learning_rate": 2.219271623672231e-05, "loss": 0.3042, "num_tokens": 1007336287.0, "step": 5278 }, { "epoch": 1.80184331797235, "grad_norm": 0.21931923522476937, "learning_rate": 2.2186393525543756e-05, "loss": 0.2872, "num_tokens": 1007484692.0, "step": 5279 }, { "epoch": 1.8021846731524152, "grad_norm": 0.2361962435212055, "learning_rate": 2.21800708143652e-05, "loss": 0.2945, "num_tokens": 1007662621.0, "step": 5280 }, { "epoch": 1.80252602833248, "grad_norm": 0.20353383721034757, "learning_rate": 2.2173748103186647e-05, "loss": 0.3086, "num_tokens": 1007829229.0, "step": 5281 }, { "epoch": 1.8028673835125448, "grad_norm": 0.22870909694415778, "learning_rate": 2.2167425392008094e-05, "loss": 0.3223, "num_tokens": 1007990959.0, "step": 5282 }, { "epoch": 1.8032087386926097, "grad_norm": 0.24980849370697022, "learning_rate": 2.216110268082954e-05, "loss": 0.2934, "num_tokens": 1008153219.0, "step": 5283 }, { "epoch": 1.8035500938726745, "grad_norm": 0.21330603161948825, "learning_rate": 2.2154779969650988e-05, "loss": 0.2811, "num_tokens": 1008347595.0, "step": 5284 }, { "epoch": 1.8038914490527393, "grad_norm": 0.2424763626257612, "learning_rate": 2.2148457258472435e-05, "loss": 0.3233, "num_tokens": 1008525286.0, "step": 5285 }, { "epoch": 1.8042328042328042, "grad_norm": 0.2071930695733912, "learning_rate": 2.214213454729388e-05, "loss": 0.2798, "num_tokens": 1008647774.0, "step": 5286 }, { "epoch": 1.8045741594128692, "grad_norm": 0.25415791650419706, "learning_rate": 2.213581183611533e-05, "loss": 0.3214, "num_tokens": 1008823651.0, "step": 5287 }, { "epoch": 1.8049155145929339, "grad_norm": 0.20295628845622074, "learning_rate": 2.2129489124936776e-05, "loss": 0.3104, "num_tokens": 1009032846.0, "step": 5288 }, { "epoch": 1.805256869772999, "grad_norm": 0.22500113598355154, "learning_rate": 2.212316641375822e-05, "loss": 0.2878, "num_tokens": 1009207900.0, "step": 5289 }, { "epoch": 1.8055982249530635, "grad_norm": 0.20756576749223443, "learning_rate": 2.2116843702579666e-05, "loss": 0.3134, "num_tokens": 1009389762.0, "step": 5290 }, { "epoch": 1.8059395801331286, "grad_norm": 0.22006992333275882, "learning_rate": 2.2110520991401117e-05, "loss": 0.2973, "num_tokens": 1009587853.0, "step": 5291 }, { "epoch": 1.8062809353131932, "grad_norm": 0.22151134337674835, "learning_rate": 2.210419828022256e-05, "loss": 0.3432, "num_tokens": 1009791241.0, "step": 5292 }, { "epoch": 1.8066222904932583, "grad_norm": 0.22074227798454543, "learning_rate": 2.2097875569044007e-05, "loss": 0.283, "num_tokens": 1009959572.0, "step": 5293 }, { "epoch": 1.8069636456733231, "grad_norm": 0.2151680932941361, "learning_rate": 2.2091552857865454e-05, "loss": 0.3128, "num_tokens": 1010166605.0, "step": 5294 }, { "epoch": 1.807305000853388, "grad_norm": 0.23655162596036788, "learning_rate": 2.20852301466869e-05, "loss": 0.3222, "num_tokens": 1010352083.0, "step": 5295 }, { "epoch": 1.8076463560334528, "grad_norm": 0.2454325797765781, "learning_rate": 2.2078907435508348e-05, "loss": 0.3447, "num_tokens": 1010515503.0, "step": 5296 }, { "epoch": 1.8079877112135176, "grad_norm": 0.1928351059938827, "learning_rate": 2.2072584724329792e-05, "loss": 0.299, "num_tokens": 1010703134.0, "step": 5297 }, { "epoch": 1.8083290663935825, "grad_norm": 0.19399321025610589, "learning_rate": 2.206626201315124e-05, "loss": 0.3136, "num_tokens": 1010910336.0, "step": 5298 }, { "epoch": 1.8086704215736473, "grad_norm": 0.234017953651915, "learning_rate": 2.205993930197269e-05, "loss": 0.2872, "num_tokens": 1011065166.0, "step": 5299 }, { "epoch": 1.8090117767537124, "grad_norm": 0.20583653878879504, "learning_rate": 2.2053616590794133e-05, "loss": 0.2998, "num_tokens": 1011262691.0, "step": 5300 }, { "epoch": 1.809353131933777, "grad_norm": 0.2124462613333516, "learning_rate": 2.204729387961558e-05, "loss": 0.2843, "num_tokens": 1011437389.0, "step": 5301 }, { "epoch": 1.809694487113842, "grad_norm": 0.22417206352693517, "learning_rate": 2.2040971168437027e-05, "loss": 0.3102, "num_tokens": 1011595841.0, "step": 5302 }, { "epoch": 1.8100358422939067, "grad_norm": 0.2280196840736011, "learning_rate": 2.2034648457258474e-05, "loss": 0.3271, "num_tokens": 1011785979.0, "step": 5303 }, { "epoch": 1.8103771974739717, "grad_norm": 0.2237091783596457, "learning_rate": 2.202832574607992e-05, "loss": 0.3198, "num_tokens": 1011985114.0, "step": 5304 }, { "epoch": 1.8107185526540364, "grad_norm": 0.20036152849971864, "learning_rate": 2.2022003034901368e-05, "loss": 0.2891, "num_tokens": 1012217420.0, "step": 5305 }, { "epoch": 1.8110599078341014, "grad_norm": 0.20563988298107136, "learning_rate": 2.201568032372281e-05, "loss": 0.2836, "num_tokens": 1012391120.0, "step": 5306 }, { "epoch": 1.8114012630141663, "grad_norm": 0.1923007144130646, "learning_rate": 2.2009357612544258e-05, "loss": 0.3048, "num_tokens": 1012590272.0, "step": 5307 }, { "epoch": 1.811742618194231, "grad_norm": 0.21167091304928073, "learning_rate": 2.200303490136571e-05, "loss": 0.3011, "num_tokens": 1012804460.0, "step": 5308 }, { "epoch": 1.812083973374296, "grad_norm": 0.1995795724329373, "learning_rate": 2.1996712190187152e-05, "loss": 0.3017, "num_tokens": 1012968504.0, "step": 5309 }, { "epoch": 1.8124253285543608, "grad_norm": 0.2271433818581005, "learning_rate": 2.19903894790086e-05, "loss": 0.3141, "num_tokens": 1013183367.0, "step": 5310 }, { "epoch": 1.8127666837344256, "grad_norm": 0.23642367133739753, "learning_rate": 2.1984066767830046e-05, "loss": 0.3313, "num_tokens": 1013371995.0, "step": 5311 }, { "epoch": 1.8131080389144905, "grad_norm": 0.2062625703892341, "learning_rate": 2.1977744056651493e-05, "loss": 0.2851, "num_tokens": 1013520925.0, "step": 5312 }, { "epoch": 1.8134493940945555, "grad_norm": 0.22937826946384013, "learning_rate": 2.197142134547294e-05, "loss": 0.2986, "num_tokens": 1013689352.0, "step": 5313 }, { "epoch": 1.8137907492746201, "grad_norm": 0.2078984892598994, "learning_rate": 2.1965098634294387e-05, "loss": 0.2999, "num_tokens": 1013907259.0, "step": 5314 }, { "epoch": 1.8141321044546852, "grad_norm": 0.20572903459973196, "learning_rate": 2.195877592311583e-05, "loss": 0.3128, "num_tokens": 1014111730.0, "step": 5315 }, { "epoch": 1.8144734596347498, "grad_norm": 0.22623480993973547, "learning_rate": 2.195245321193728e-05, "loss": 0.3027, "num_tokens": 1014265279.0, "step": 5316 }, { "epoch": 1.8148148148148149, "grad_norm": 0.21411418472115817, "learning_rate": 2.1946130500758728e-05, "loss": 0.3167, "num_tokens": 1014468744.0, "step": 5317 }, { "epoch": 1.8151561699948797, "grad_norm": 0.22415361045783946, "learning_rate": 2.193980778958017e-05, "loss": 0.2859, "num_tokens": 1014662937.0, "step": 5318 }, { "epoch": 1.8154975251749446, "grad_norm": 0.22242726879753902, "learning_rate": 2.193348507840162e-05, "loss": 0.296, "num_tokens": 1014845104.0, "step": 5319 }, { "epoch": 1.8158388803550094, "grad_norm": 0.20264626787208767, "learning_rate": 2.1927162367223066e-05, "loss": 0.3227, "num_tokens": 1015026695.0, "step": 5320 }, { "epoch": 1.8161802355350742, "grad_norm": 0.20971844302885306, "learning_rate": 2.1920839656044513e-05, "loss": 0.2943, "num_tokens": 1015222790.0, "step": 5321 }, { "epoch": 1.816521590715139, "grad_norm": 0.23300567025055363, "learning_rate": 2.191451694486596e-05, "loss": 0.2769, "num_tokens": 1015391339.0, "step": 5322 }, { "epoch": 1.816862945895204, "grad_norm": 0.20812311861485078, "learning_rate": 2.1908194233687407e-05, "loss": 0.2966, "num_tokens": 1015579613.0, "step": 5323 }, { "epoch": 1.817204301075269, "grad_norm": 0.22320693805530342, "learning_rate": 2.190187152250885e-05, "loss": 0.2884, "num_tokens": 1015744752.0, "step": 5324 }, { "epoch": 1.8175456562553336, "grad_norm": 0.22787887137373858, "learning_rate": 2.18955488113303e-05, "loss": 0.2897, "num_tokens": 1015951520.0, "step": 5325 }, { "epoch": 1.8178870114353987, "grad_norm": 0.18467720533482332, "learning_rate": 2.1889226100151747e-05, "loss": 0.3199, "num_tokens": 1016146722.0, "step": 5326 }, { "epoch": 1.8182283666154633, "grad_norm": 0.2464030560627981, "learning_rate": 2.188290338897319e-05, "loss": 0.2951, "num_tokens": 1016325510.0, "step": 5327 }, { "epoch": 1.8185697217955283, "grad_norm": 0.2017936072979188, "learning_rate": 2.1876580677794638e-05, "loss": 0.3051, "num_tokens": 1016545689.0, "step": 5328 }, { "epoch": 1.818911076975593, "grad_norm": 0.19013162080302104, "learning_rate": 2.187025796661609e-05, "loss": 0.3336, "num_tokens": 1016751103.0, "step": 5329 }, { "epoch": 1.819252432155658, "grad_norm": 0.2125450880078978, "learning_rate": 2.1863935255437532e-05, "loss": 0.3119, "num_tokens": 1016938711.0, "step": 5330 }, { "epoch": 1.8195937873357229, "grad_norm": 0.2070512513815564, "learning_rate": 2.185761254425898e-05, "loss": 0.3172, "num_tokens": 1017161610.0, "step": 5331 }, { "epoch": 1.8199351425157877, "grad_norm": 0.22339700581941052, "learning_rate": 2.1851289833080426e-05, "loss": 0.2983, "num_tokens": 1017372621.0, "step": 5332 }, { "epoch": 1.8202764976958525, "grad_norm": 0.19881426920861545, "learning_rate": 2.1844967121901873e-05, "loss": 0.3077, "num_tokens": 1017554187.0, "step": 5333 }, { "epoch": 1.8206178528759174, "grad_norm": 0.22810776263349294, "learning_rate": 2.183864441072332e-05, "loss": 0.2788, "num_tokens": 1017732579.0, "step": 5334 }, { "epoch": 1.8209592080559822, "grad_norm": 0.260755842273014, "learning_rate": 2.1832321699544767e-05, "loss": 0.3547, "num_tokens": 1017929891.0, "step": 5335 }, { "epoch": 1.821300563236047, "grad_norm": 0.21828961985827022, "learning_rate": 2.182599898836621e-05, "loss": 0.286, "num_tokens": 1018096604.0, "step": 5336 }, { "epoch": 1.8216419184161121, "grad_norm": 0.2230697840231562, "learning_rate": 2.1819676277187657e-05, "loss": 0.3248, "num_tokens": 1018307728.0, "step": 5337 }, { "epoch": 1.8219832735961767, "grad_norm": 0.22378853036048252, "learning_rate": 2.1813353566009108e-05, "loss": 0.284, "num_tokens": 1018476953.0, "step": 5338 }, { "epoch": 1.8223246287762418, "grad_norm": 0.22292912184248276, "learning_rate": 2.180703085483055e-05, "loss": 0.2978, "num_tokens": 1018667111.0, "step": 5339 }, { "epoch": 1.8226659839563064, "grad_norm": 0.2020543940039999, "learning_rate": 2.1800708143652e-05, "loss": 0.2997, "num_tokens": 1018840410.0, "step": 5340 }, { "epoch": 1.8230073391363715, "grad_norm": 0.23419053417434238, "learning_rate": 2.1794385432473445e-05, "loss": 0.2996, "num_tokens": 1019062033.0, "step": 5341 }, { "epoch": 1.823348694316436, "grad_norm": 0.20725436935174826, "learning_rate": 2.1788062721294892e-05, "loss": 0.2967, "num_tokens": 1019234473.0, "step": 5342 }, { "epoch": 1.8236900494965012, "grad_norm": 0.1983394577094136, "learning_rate": 2.178174001011634e-05, "loss": 0.319, "num_tokens": 1019455487.0, "step": 5343 }, { "epoch": 1.824031404676566, "grad_norm": 0.20687117413572237, "learning_rate": 2.1775417298937786e-05, "loss": 0.3041, "num_tokens": 1019646581.0, "step": 5344 }, { "epoch": 1.8243727598566308, "grad_norm": 0.2106247766563037, "learning_rate": 2.176909458775923e-05, "loss": 0.2763, "num_tokens": 1019821363.0, "step": 5345 }, { "epoch": 1.8247141150366957, "grad_norm": 0.22256320283623968, "learning_rate": 2.176277187658068e-05, "loss": 0.2868, "num_tokens": 1020007148.0, "step": 5346 }, { "epoch": 1.8250554702167605, "grad_norm": 0.2006353739317664, "learning_rate": 2.1756449165402127e-05, "loss": 0.2979, "num_tokens": 1020214984.0, "step": 5347 }, { "epoch": 1.8253968253968254, "grad_norm": 0.25418765495907486, "learning_rate": 2.175012645422357e-05, "loss": 0.2947, "num_tokens": 1020403150.0, "step": 5348 }, { "epoch": 1.8257381805768902, "grad_norm": 0.20930121716091246, "learning_rate": 2.1743803743045018e-05, "loss": 0.2602, "num_tokens": 1020582086.0, "step": 5349 }, { "epoch": 1.8260795357569553, "grad_norm": 0.1970086281570496, "learning_rate": 2.1737481031866465e-05, "loss": 0.3317, "num_tokens": 1020791957.0, "step": 5350 }, { "epoch": 1.8264208909370199, "grad_norm": 0.2571809133183685, "learning_rate": 2.1731158320687912e-05, "loss": 0.2993, "num_tokens": 1020978531.0, "step": 5351 }, { "epoch": 1.826762246117085, "grad_norm": 0.19149026368444189, "learning_rate": 2.172483560950936e-05, "loss": 0.2926, "num_tokens": 1021186111.0, "step": 5352 }, { "epoch": 1.8271036012971495, "grad_norm": 0.2326893892702177, "learning_rate": 2.1718512898330806e-05, "loss": 0.2874, "num_tokens": 1021327934.0, "step": 5353 }, { "epoch": 1.8274449564772146, "grad_norm": 0.22928388033555044, "learning_rate": 2.171219018715225e-05, "loss": 0.3101, "num_tokens": 1021549593.0, "step": 5354 }, { "epoch": 1.8277863116572794, "grad_norm": 0.18948001023031963, "learning_rate": 2.17058674759737e-05, "loss": 0.2973, "num_tokens": 1021762844.0, "step": 5355 }, { "epoch": 1.8281276668373443, "grad_norm": 0.2213633790239296, "learning_rate": 2.1699544764795147e-05, "loss": 0.3088, "num_tokens": 1021980348.0, "step": 5356 }, { "epoch": 1.8284690220174091, "grad_norm": 0.229564423566188, "learning_rate": 2.169322205361659e-05, "loss": 0.3201, "num_tokens": 1022163203.0, "step": 5357 }, { "epoch": 1.828810377197474, "grad_norm": 0.23282927841970646, "learning_rate": 2.1686899342438037e-05, "loss": 0.2717, "num_tokens": 1022330166.0, "step": 5358 }, { "epoch": 1.8291517323775388, "grad_norm": 0.19987809818281718, "learning_rate": 2.1680576631259484e-05, "loss": 0.2869, "num_tokens": 1022505004.0, "step": 5359 }, { "epoch": 1.8294930875576036, "grad_norm": 0.2174897628444138, "learning_rate": 2.167425392008093e-05, "loss": 0.3209, "num_tokens": 1022689231.0, "step": 5360 }, { "epoch": 1.8298344427376687, "grad_norm": 0.22661338575700266, "learning_rate": 2.166793120890238e-05, "loss": 0.3036, "num_tokens": 1022877399.0, "step": 5361 }, { "epoch": 1.8301757979177333, "grad_norm": 0.22945874430010368, "learning_rate": 2.1661608497723825e-05, "loss": 0.3118, "num_tokens": 1023063429.0, "step": 5362 }, { "epoch": 1.8305171530977984, "grad_norm": 0.22457660391581802, "learning_rate": 2.1655285786545272e-05, "loss": 0.2993, "num_tokens": 1023264275.0, "step": 5363 }, { "epoch": 1.830858508277863, "grad_norm": 0.22725790836438145, "learning_rate": 2.164896307536672e-05, "loss": 0.3172, "num_tokens": 1023484160.0, "step": 5364 }, { "epoch": 1.831199863457928, "grad_norm": 0.1908826388372762, "learning_rate": 2.1642640364188166e-05, "loss": 0.3154, "num_tokens": 1023689257.0, "step": 5365 }, { "epoch": 1.8315412186379927, "grad_norm": 0.2234201539634765, "learning_rate": 2.163631765300961e-05, "loss": 0.3213, "num_tokens": 1023899341.0, "step": 5366 }, { "epoch": 1.8318825738180577, "grad_norm": 0.22143854115362455, "learning_rate": 2.1629994941831057e-05, "loss": 0.3122, "num_tokens": 1024090575.0, "step": 5367 }, { "epoch": 1.8322239289981226, "grad_norm": 0.21529612227673695, "learning_rate": 2.1623672230652507e-05, "loss": 0.3001, "num_tokens": 1024288136.0, "step": 5368 }, { "epoch": 1.8325652841781874, "grad_norm": 0.22127979393406896, "learning_rate": 2.161734951947395e-05, "loss": 0.2967, "num_tokens": 1024512185.0, "step": 5369 }, { "epoch": 1.8329066393582523, "grad_norm": 0.19724674858616775, "learning_rate": 2.1611026808295398e-05, "loss": 0.295, "num_tokens": 1024676813.0, "step": 5370 }, { "epoch": 1.833247994538317, "grad_norm": 0.2466249571569744, "learning_rate": 2.1604704097116845e-05, "loss": 0.3046, "num_tokens": 1024854896.0, "step": 5371 }, { "epoch": 1.833589349718382, "grad_norm": 0.2104861753038104, "learning_rate": 2.1598381385938292e-05, "loss": 0.3133, "num_tokens": 1025017238.0, "step": 5372 }, { "epoch": 1.8339307048984468, "grad_norm": 0.24098689475950674, "learning_rate": 2.159205867475974e-05, "loss": 0.3314, "num_tokens": 1025204282.0, "step": 5373 }, { "epoch": 1.8342720600785118, "grad_norm": 0.2068412520702609, "learning_rate": 2.1585735963581186e-05, "loss": 0.3256, "num_tokens": 1025419265.0, "step": 5374 }, { "epoch": 1.8346134152585765, "grad_norm": 0.21060996011281294, "learning_rate": 2.157941325240263e-05, "loss": 0.2874, "num_tokens": 1025612576.0, "step": 5375 }, { "epoch": 1.8349547704386415, "grad_norm": 0.1942570844312659, "learning_rate": 2.157309054122408e-05, "loss": 0.2927, "num_tokens": 1025804343.0, "step": 5376 }, { "epoch": 1.8352961256187061, "grad_norm": 0.23804732089377031, "learning_rate": 2.1566767830045527e-05, "loss": 0.298, "num_tokens": 1025959628.0, "step": 5377 }, { "epoch": 1.8356374807987712, "grad_norm": 0.24081073704337408, "learning_rate": 2.156044511886697e-05, "loss": 0.277, "num_tokens": 1026117202.0, "step": 5378 }, { "epoch": 1.8359788359788358, "grad_norm": 0.1992478756689658, "learning_rate": 2.1554122407688417e-05, "loss": 0.2903, "num_tokens": 1026308046.0, "step": 5379 }, { "epoch": 1.8363201911589009, "grad_norm": 0.22952799101250768, "learning_rate": 2.1547799696509864e-05, "loss": 0.296, "num_tokens": 1026507881.0, "step": 5380 }, { "epoch": 1.8366615463389657, "grad_norm": 0.19149983552676378, "learning_rate": 2.154147698533131e-05, "loss": 0.3199, "num_tokens": 1026753807.0, "step": 5381 }, { "epoch": 1.8370029015190306, "grad_norm": 0.19874330912992805, "learning_rate": 2.1535154274152758e-05, "loss": 0.2593, "num_tokens": 1026903943.0, "step": 5382 }, { "epoch": 1.8373442566990954, "grad_norm": 0.25574708860369916, "learning_rate": 2.1528831562974205e-05, "loss": 0.3359, "num_tokens": 1027069668.0, "step": 5383 }, { "epoch": 1.8376856118791602, "grad_norm": 0.20485358789690164, "learning_rate": 2.152250885179565e-05, "loss": 0.2855, "num_tokens": 1027285550.0, "step": 5384 }, { "epoch": 1.838026967059225, "grad_norm": 0.20882495819422517, "learning_rate": 2.15161861406171e-05, "loss": 0.3099, "num_tokens": 1027487302.0, "step": 5385 }, { "epoch": 1.83836832223929, "grad_norm": 0.20339789070895817, "learning_rate": 2.1509863429438546e-05, "loss": 0.3311, "num_tokens": 1027687796.0, "step": 5386 }, { "epoch": 1.838709677419355, "grad_norm": 0.23076613650998723, "learning_rate": 2.150354071825999e-05, "loss": 0.2822, "num_tokens": 1027859231.0, "step": 5387 }, { "epoch": 1.8390510325994196, "grad_norm": 0.2055866100581792, "learning_rate": 2.1497218007081437e-05, "loss": 0.2768, "num_tokens": 1028026192.0, "step": 5388 }, { "epoch": 1.8393923877794847, "grad_norm": 0.21049728966499145, "learning_rate": 2.1490895295902884e-05, "loss": 0.3156, "num_tokens": 1028250639.0, "step": 5389 }, { "epoch": 1.8397337429595493, "grad_norm": 0.2827336070103864, "learning_rate": 2.148457258472433e-05, "loss": 0.3109, "num_tokens": 1028443088.0, "step": 5390 }, { "epoch": 1.8400750981396143, "grad_norm": 0.21533588554102154, "learning_rate": 2.1478249873545778e-05, "loss": 0.298, "num_tokens": 1028627705.0, "step": 5391 }, { "epoch": 1.8404164533196792, "grad_norm": 0.22404286520944103, "learning_rate": 2.1471927162367225e-05, "loss": 0.3375, "num_tokens": 1028822470.0, "step": 5392 }, { "epoch": 1.840757808499744, "grad_norm": 0.2422649685271615, "learning_rate": 2.146560445118867e-05, "loss": 0.3012, "num_tokens": 1028988401.0, "step": 5393 }, { "epoch": 1.8410991636798089, "grad_norm": 0.23010608669057456, "learning_rate": 2.145928174001012e-05, "loss": 0.2997, "num_tokens": 1029179904.0, "step": 5394 }, { "epoch": 1.8414405188598737, "grad_norm": 0.21370255380337932, "learning_rate": 2.1452959028831566e-05, "loss": 0.331, "num_tokens": 1029403129.0, "step": 5395 }, { "epoch": 1.8417818740399385, "grad_norm": 0.2140561171831577, "learning_rate": 2.144663631765301e-05, "loss": 0.3176, "num_tokens": 1029613741.0, "step": 5396 }, { "epoch": 1.8421232292200034, "grad_norm": 0.18656561102354013, "learning_rate": 2.1440313606474456e-05, "loss": 0.3008, "num_tokens": 1029801440.0, "step": 5397 }, { "epoch": 1.8424645844000682, "grad_norm": 0.22222667897102807, "learning_rate": 2.1433990895295907e-05, "loss": 0.2659, "num_tokens": 1029983020.0, "step": 5398 }, { "epoch": 1.842805939580133, "grad_norm": 0.23304279925183535, "learning_rate": 2.142766818411735e-05, "loss": 0.3018, "num_tokens": 1030166050.0, "step": 5399 }, { "epoch": 1.8431472947601981, "grad_norm": 0.21256729077890757, "learning_rate": 2.1421345472938797e-05, "loss": 0.3052, "num_tokens": 1030349165.0, "step": 5400 }, { "epoch": 1.8434886499402627, "grad_norm": 0.21576865241643206, "learning_rate": 2.1415022761760244e-05, "loss": 0.3139, "num_tokens": 1030579979.0, "step": 5401 }, { "epoch": 1.8438300051203278, "grad_norm": 0.20792388133209475, "learning_rate": 2.140870005058169e-05, "loss": 0.2906, "num_tokens": 1030770833.0, "step": 5402 }, { "epoch": 1.8441713603003924, "grad_norm": 0.21370631591121578, "learning_rate": 2.1402377339403138e-05, "loss": 0.2785, "num_tokens": 1030926203.0, "step": 5403 }, { "epoch": 1.8445127154804575, "grad_norm": 0.22029019627812138, "learning_rate": 2.1396054628224585e-05, "loss": 0.2991, "num_tokens": 1031102363.0, "step": 5404 }, { "epoch": 1.8448540706605223, "grad_norm": 0.22338807975249236, "learning_rate": 2.138973191704603e-05, "loss": 0.3356, "num_tokens": 1031331545.0, "step": 5405 }, { "epoch": 1.8451954258405872, "grad_norm": 0.20353567963070762, "learning_rate": 2.138340920586748e-05, "loss": 0.3152, "num_tokens": 1031512886.0, "step": 5406 }, { "epoch": 1.845536781020652, "grad_norm": 0.20639708421665612, "learning_rate": 2.1377086494688926e-05, "loss": 0.3299, "num_tokens": 1031716662.0, "step": 5407 }, { "epoch": 1.8458781362007168, "grad_norm": 0.24005075303886578, "learning_rate": 2.137076378351037e-05, "loss": 0.2857, "num_tokens": 1031884047.0, "step": 5408 }, { "epoch": 1.8462194913807817, "grad_norm": 0.20368099090235764, "learning_rate": 2.1364441072331817e-05, "loss": 0.2891, "num_tokens": 1032084416.0, "step": 5409 }, { "epoch": 1.8465608465608465, "grad_norm": 0.2260474845967616, "learning_rate": 2.1358118361153264e-05, "loss": 0.3227, "num_tokens": 1032291566.0, "step": 5410 }, { "epoch": 1.8469022017409116, "grad_norm": 0.20703716152534285, "learning_rate": 2.135179564997471e-05, "loss": 0.3337, "num_tokens": 1032490788.0, "step": 5411 }, { "epoch": 1.8472435569209762, "grad_norm": 0.2395109599400039, "learning_rate": 2.1345472938796158e-05, "loss": 0.2861, "num_tokens": 1032668669.0, "step": 5412 }, { "epoch": 1.8475849121010413, "grad_norm": 0.22257773382274038, "learning_rate": 2.1339150227617604e-05, "loss": 0.2989, "num_tokens": 1032840336.0, "step": 5413 }, { "epoch": 1.8479262672811059, "grad_norm": 0.2272695287934601, "learning_rate": 2.1332827516439048e-05, "loss": 0.299, "num_tokens": 1033047910.0, "step": 5414 }, { "epoch": 1.848267622461171, "grad_norm": 0.21589097852850278, "learning_rate": 2.13265048052605e-05, "loss": 0.3451, "num_tokens": 1033257663.0, "step": 5415 }, { "epoch": 1.8486089776412356, "grad_norm": 0.2708320645322161, "learning_rate": 2.1320182094081945e-05, "loss": 0.2806, "num_tokens": 1033441518.0, "step": 5416 }, { "epoch": 1.8489503328213006, "grad_norm": 0.21359926431778964, "learning_rate": 2.131385938290339e-05, "loss": 0.2755, "num_tokens": 1033594343.0, "step": 5417 }, { "epoch": 1.8492916880013655, "grad_norm": 0.23134816949041617, "learning_rate": 2.1307536671724836e-05, "loss": 0.2963, "num_tokens": 1033782533.0, "step": 5418 }, { "epoch": 1.8496330431814303, "grad_norm": 0.20804436724087796, "learning_rate": 2.1301213960546283e-05, "loss": 0.3179, "num_tokens": 1033976681.0, "step": 5419 }, { "epoch": 1.8499743983614951, "grad_norm": 0.20822636174964937, "learning_rate": 2.129489124936773e-05, "loss": 0.3006, "num_tokens": 1034167781.0, "step": 5420 }, { "epoch": 1.85031575354156, "grad_norm": 0.18081377142520333, "learning_rate": 2.1288568538189177e-05, "loss": 0.2944, "num_tokens": 1034369975.0, "step": 5421 }, { "epoch": 1.8506571087216248, "grad_norm": 0.2441896011396401, "learning_rate": 2.1282245827010624e-05, "loss": 0.3505, "num_tokens": 1034572934.0, "step": 5422 }, { "epoch": 1.8509984639016897, "grad_norm": 0.20001576921548644, "learning_rate": 2.127592311583207e-05, "loss": 0.3239, "num_tokens": 1034782156.0, "step": 5423 }, { "epoch": 1.8513398190817547, "grad_norm": 0.23126461541338772, "learning_rate": 2.1269600404653518e-05, "loss": 0.323, "num_tokens": 1034986802.0, "step": 5424 }, { "epoch": 1.8516811742618193, "grad_norm": 0.22309032020562097, "learning_rate": 2.1263277693474965e-05, "loss": 0.2933, "num_tokens": 1035160293.0, "step": 5425 }, { "epoch": 1.8520225294418844, "grad_norm": 0.21069747346450704, "learning_rate": 2.125695498229641e-05, "loss": 0.3021, "num_tokens": 1035348016.0, "step": 5426 }, { "epoch": 1.852363884621949, "grad_norm": 0.2507407187052223, "learning_rate": 2.1250632271117855e-05, "loss": 0.3229, "num_tokens": 1035513647.0, "step": 5427 }, { "epoch": 1.852705239802014, "grad_norm": 0.20875376454773942, "learning_rate": 2.1244309559939302e-05, "loss": 0.3087, "num_tokens": 1035714305.0, "step": 5428 }, { "epoch": 1.8530465949820787, "grad_norm": 0.23252488997943177, "learning_rate": 2.123798684876075e-05, "loss": 0.288, "num_tokens": 1035922601.0, "step": 5429 }, { "epoch": 1.8533879501621437, "grad_norm": 0.19474888561685533, "learning_rate": 2.1231664137582196e-05, "loss": 0.3224, "num_tokens": 1036128358.0, "step": 5430 }, { "epoch": 1.8537293053422086, "grad_norm": 0.21715302148272297, "learning_rate": 2.122534142640364e-05, "loss": 0.3248, "num_tokens": 1036302254.0, "step": 5431 }, { "epoch": 1.8540706605222734, "grad_norm": 0.20882470719836757, "learning_rate": 2.121901871522509e-05, "loss": 0.3094, "num_tokens": 1036488995.0, "step": 5432 }, { "epoch": 1.8544120157023383, "grad_norm": 0.23693807519766308, "learning_rate": 2.1212696004046537e-05, "loss": 0.2979, "num_tokens": 1036665408.0, "step": 5433 }, { "epoch": 1.854753370882403, "grad_norm": 0.1981985792231749, "learning_rate": 2.120637329286798e-05, "loss": 0.2907, "num_tokens": 1036833224.0, "step": 5434 }, { "epoch": 1.855094726062468, "grad_norm": 0.22626595905577435, "learning_rate": 2.1200050581689428e-05, "loss": 0.3548, "num_tokens": 1037068420.0, "step": 5435 }, { "epoch": 1.8554360812425328, "grad_norm": 0.24117450102586177, "learning_rate": 2.1193727870510875e-05, "loss": 0.2978, "num_tokens": 1037277528.0, "step": 5436 }, { "epoch": 1.8557774364225978, "grad_norm": 0.18803035257644338, "learning_rate": 2.1187405159332322e-05, "loss": 0.2975, "num_tokens": 1037480142.0, "step": 5437 }, { "epoch": 1.8561187916026625, "grad_norm": 0.21051879806528573, "learning_rate": 2.118108244815377e-05, "loss": 0.3123, "num_tokens": 1037680591.0, "step": 5438 }, { "epoch": 1.8564601467827275, "grad_norm": 0.2228692854896203, "learning_rate": 2.1174759736975216e-05, "loss": 0.3163, "num_tokens": 1037855073.0, "step": 5439 }, { "epoch": 1.8568015019627921, "grad_norm": 0.21471698210141077, "learning_rate": 2.1168437025796663e-05, "loss": 0.277, "num_tokens": 1038032851.0, "step": 5440 }, { "epoch": 1.8571428571428572, "grad_norm": 0.16865294645321016, "learning_rate": 2.116211431461811e-05, "loss": 0.3145, "num_tokens": 1038257208.0, "step": 5441 }, { "epoch": 1.857484212322922, "grad_norm": 0.20219885361077772, "learning_rate": 2.1155791603439557e-05, "loss": 0.3041, "num_tokens": 1038447965.0, "step": 5442 }, { "epoch": 1.8578255675029869, "grad_norm": 0.21896752544430303, "learning_rate": 2.1149468892261e-05, "loss": 0.3386, "num_tokens": 1038664316.0, "step": 5443 }, { "epoch": 1.8581669226830517, "grad_norm": 0.28864420980940503, "learning_rate": 2.1143146181082447e-05, "loss": 0.3104, "num_tokens": 1038868012.0, "step": 5444 }, { "epoch": 1.8585082778631166, "grad_norm": 0.1812636274487732, "learning_rate": 2.1136823469903898e-05, "loss": 0.2863, "num_tokens": 1039078194.0, "step": 5445 }, { "epoch": 1.8588496330431814, "grad_norm": 0.21901034393617405, "learning_rate": 2.113050075872534e-05, "loss": 0.3045, "num_tokens": 1039272121.0, "step": 5446 }, { "epoch": 1.8591909882232462, "grad_norm": 0.20331243506212268, "learning_rate": 2.112417804754679e-05, "loss": 0.2873, "num_tokens": 1039455378.0, "step": 5447 }, { "epoch": 1.8595323434033113, "grad_norm": 0.2037598028862444, "learning_rate": 2.1117855336368235e-05, "loss": 0.2822, "num_tokens": 1039649841.0, "step": 5448 }, { "epoch": 1.859873698583376, "grad_norm": 0.2181631431412717, "learning_rate": 2.1111532625189682e-05, "loss": 0.3046, "num_tokens": 1039840180.0, "step": 5449 }, { "epoch": 1.860215053763441, "grad_norm": 0.2343859863233426, "learning_rate": 2.110520991401113e-05, "loss": 0.3056, "num_tokens": 1040009878.0, "step": 5450 }, { "epoch": 1.8605564089435056, "grad_norm": 0.203929642982799, "learning_rate": 2.1098887202832576e-05, "loss": 0.297, "num_tokens": 1040189450.0, "step": 5451 }, { "epoch": 1.8608977641235707, "grad_norm": 0.2166457804019196, "learning_rate": 2.109256449165402e-05, "loss": 0.312, "num_tokens": 1040379378.0, "step": 5452 }, { "epoch": 1.8612391193036353, "grad_norm": 0.2213202833254624, "learning_rate": 2.108624178047547e-05, "loss": 0.3246, "num_tokens": 1040518602.0, "step": 5453 }, { "epoch": 1.8615804744837003, "grad_norm": 0.24226432255608446, "learning_rate": 2.1079919069296917e-05, "loss": 0.3075, "num_tokens": 1040703513.0, "step": 5454 }, { "epoch": 1.8619218296637652, "grad_norm": 0.19814421230458457, "learning_rate": 2.107359635811836e-05, "loss": 0.2639, "num_tokens": 1040876897.0, "step": 5455 }, { "epoch": 1.86226318484383, "grad_norm": 0.24123450166791682, "learning_rate": 2.1067273646939808e-05, "loss": 0.317, "num_tokens": 1041081624.0, "step": 5456 }, { "epoch": 1.8626045400238949, "grad_norm": 1.7519409997520092, "learning_rate": 2.1060950935761255e-05, "loss": 0.2993, "num_tokens": 1041245789.0, "step": 5457 }, { "epoch": 1.8629458952039597, "grad_norm": 0.23245014325599564, "learning_rate": 2.1054628224582702e-05, "loss": 0.3211, "num_tokens": 1041465307.0, "step": 5458 }, { "epoch": 1.8632872503840245, "grad_norm": 0.2102395310649149, "learning_rate": 2.104830551340415e-05, "loss": 0.2985, "num_tokens": 1041656979.0, "step": 5459 }, { "epoch": 1.8636286055640894, "grad_norm": 0.21302386801692186, "learning_rate": 2.1041982802225596e-05, "loss": 0.3037, "num_tokens": 1041822086.0, "step": 5460 }, { "epoch": 1.8639699607441544, "grad_norm": 0.20441491759015254, "learning_rate": 2.103566009104704e-05, "loss": 0.3101, "num_tokens": 1041998049.0, "step": 5461 }, { "epoch": 1.864311315924219, "grad_norm": 0.23776148351664675, "learning_rate": 2.102933737986849e-05, "loss": 0.2908, "num_tokens": 1042183197.0, "step": 5462 }, { "epoch": 1.8646526711042841, "grad_norm": 0.19637331379647155, "learning_rate": 2.1023014668689937e-05, "loss": 0.3099, "num_tokens": 1042383759.0, "step": 5463 }, { "epoch": 1.8649940262843487, "grad_norm": 0.20439309994660645, "learning_rate": 2.101669195751138e-05, "loss": 0.2917, "num_tokens": 1042569058.0, "step": 5464 }, { "epoch": 1.8653353814644138, "grad_norm": 0.23823847990578556, "learning_rate": 2.1010369246332827e-05, "loss": 0.2951, "num_tokens": 1042757489.0, "step": 5465 }, { "epoch": 1.8656767366444784, "grad_norm": 0.20007837897061628, "learning_rate": 2.1004046535154274e-05, "loss": 0.317, "num_tokens": 1042952496.0, "step": 5466 }, { "epoch": 1.8660180918245435, "grad_norm": 0.21989084308932394, "learning_rate": 2.099772382397572e-05, "loss": 0.3129, "num_tokens": 1043150760.0, "step": 5467 }, { "epoch": 1.8663594470046083, "grad_norm": 0.2162206002566986, "learning_rate": 2.0991401112797168e-05, "loss": 0.3102, "num_tokens": 1043333388.0, "step": 5468 }, { "epoch": 1.8667008021846732, "grad_norm": 0.21631172330941187, "learning_rate": 2.0985078401618615e-05, "loss": 0.2968, "num_tokens": 1043509661.0, "step": 5469 }, { "epoch": 1.867042157364738, "grad_norm": 0.18514462180723526, "learning_rate": 2.0978755690440062e-05, "loss": 0.332, "num_tokens": 1043735684.0, "step": 5470 }, { "epoch": 1.8673835125448028, "grad_norm": 0.252824002201199, "learning_rate": 2.097243297926151e-05, "loss": 0.325, "num_tokens": 1043900879.0, "step": 5471 }, { "epoch": 1.8677248677248677, "grad_norm": 0.2151909659425562, "learning_rate": 2.0966110268082956e-05, "loss": 0.302, "num_tokens": 1044070216.0, "step": 5472 }, { "epoch": 1.8680662229049325, "grad_norm": 0.2338416507495138, "learning_rate": 2.09597875569044e-05, "loss": 0.3254, "num_tokens": 1044288842.0, "step": 5473 }, { "epoch": 1.8684075780849976, "grad_norm": 0.19989279347925615, "learning_rate": 2.0953464845725847e-05, "loss": 0.3064, "num_tokens": 1044477277.0, "step": 5474 }, { "epoch": 1.8687489332650622, "grad_norm": 0.24581800995337075, "learning_rate": 2.0947142134547297e-05, "loss": 0.2761, "num_tokens": 1044633793.0, "step": 5475 }, { "epoch": 1.8690902884451273, "grad_norm": 0.20235728994054353, "learning_rate": 2.094081942336874e-05, "loss": 0.2986, "num_tokens": 1044855795.0, "step": 5476 }, { "epoch": 1.8694316436251919, "grad_norm": 0.16852768056350087, "learning_rate": 2.0934496712190188e-05, "loss": 0.313, "num_tokens": 1045077785.0, "step": 5477 }, { "epoch": 1.869772998805257, "grad_norm": 0.23125578197288252, "learning_rate": 2.0928174001011635e-05, "loss": 0.3208, "num_tokens": 1045276177.0, "step": 5478 }, { "epoch": 1.8701143539853218, "grad_norm": 0.22468571499931897, "learning_rate": 2.092185128983308e-05, "loss": 0.317, "num_tokens": 1045469752.0, "step": 5479 }, { "epoch": 1.8704557091653866, "grad_norm": 0.1791879352305478, "learning_rate": 2.091552857865453e-05, "loss": 0.3014, "num_tokens": 1045671352.0, "step": 5480 }, { "epoch": 1.8707970643454515, "grad_norm": 0.23570546100510986, "learning_rate": 2.0909205867475976e-05, "loss": 0.3103, "num_tokens": 1045865097.0, "step": 5481 }, { "epoch": 1.8711384195255163, "grad_norm": 0.20184180669749455, "learning_rate": 2.090288315629742e-05, "loss": 0.2771, "num_tokens": 1046033285.0, "step": 5482 }, { "epoch": 1.8714797747055811, "grad_norm": 0.23820646255336148, "learning_rate": 2.089656044511887e-05, "loss": 0.3209, "num_tokens": 1046237001.0, "step": 5483 }, { "epoch": 1.871821129885646, "grad_norm": 0.18728742630446538, "learning_rate": 2.0890237733940317e-05, "loss": 0.2774, "num_tokens": 1046420160.0, "step": 5484 }, { "epoch": 1.872162485065711, "grad_norm": 0.21349439650562468, "learning_rate": 2.088391502276176e-05, "loss": 0.2856, "num_tokens": 1046594389.0, "step": 5485 }, { "epoch": 1.8725038402457757, "grad_norm": 0.2281775934461771, "learning_rate": 2.0877592311583207e-05, "loss": 0.3099, "num_tokens": 1046780381.0, "step": 5486 }, { "epoch": 1.8728451954258407, "grad_norm": 0.21531078167798887, "learning_rate": 2.0871269600404654e-05, "loss": 0.2812, "num_tokens": 1046970095.0, "step": 5487 }, { "epoch": 1.8731865506059053, "grad_norm": 0.20644255329367256, "learning_rate": 2.08649468892261e-05, "loss": 0.3047, "num_tokens": 1047186884.0, "step": 5488 }, { "epoch": 1.8735279057859704, "grad_norm": 0.2271356818419822, "learning_rate": 2.0858624178047548e-05, "loss": 0.3088, "num_tokens": 1047382157.0, "step": 5489 }, { "epoch": 1.873869260966035, "grad_norm": 0.2392054628621359, "learning_rate": 2.0852301466868995e-05, "loss": 0.3356, "num_tokens": 1047577059.0, "step": 5490 }, { "epoch": 1.8742106161461, "grad_norm": 0.23232920369700832, "learning_rate": 2.084597875569044e-05, "loss": 0.311, "num_tokens": 1047743322.0, "step": 5491 }, { "epoch": 1.874551971326165, "grad_norm": 0.198525089460525, "learning_rate": 2.083965604451189e-05, "loss": 0.3017, "num_tokens": 1047950687.0, "step": 5492 }, { "epoch": 1.8748933265062298, "grad_norm": 0.2159876959140857, "learning_rate": 2.0833333333333336e-05, "loss": 0.2874, "num_tokens": 1048156728.0, "step": 5493 }, { "epoch": 1.8752346816862946, "grad_norm": 0.2290535100352215, "learning_rate": 2.082701062215478e-05, "loss": 0.328, "num_tokens": 1048352696.0, "step": 5494 }, { "epoch": 1.8755760368663594, "grad_norm": 0.21876127288954228, "learning_rate": 2.0820687910976227e-05, "loss": 0.2893, "num_tokens": 1048525956.0, "step": 5495 }, { "epoch": 1.8759173920464243, "grad_norm": 0.2003994142173395, "learning_rate": 2.0814365199797674e-05, "loss": 0.3056, "num_tokens": 1048746279.0, "step": 5496 }, { "epoch": 1.876258747226489, "grad_norm": 0.22104906034421667, "learning_rate": 2.080804248861912e-05, "loss": 0.3053, "num_tokens": 1048965614.0, "step": 5497 }, { "epoch": 1.8766001024065542, "grad_norm": 0.18983549813085449, "learning_rate": 2.0801719777440568e-05, "loss": 0.3197, "num_tokens": 1049181623.0, "step": 5498 }, { "epoch": 1.8769414575866188, "grad_norm": 0.2361100861150334, "learning_rate": 2.0795397066262015e-05, "loss": 0.3363, "num_tokens": 1049371328.0, "step": 5499 }, { "epoch": 1.8772828127666839, "grad_norm": 0.23406170517425706, "learning_rate": 2.078907435508346e-05, "loss": 0.2865, "num_tokens": 1049547223.0, "step": 5500 }, { "epoch": 1.8776241679467485, "grad_norm": 0.19917664204187926, "learning_rate": 2.078275164390491e-05, "loss": 0.2849, "num_tokens": 1049731680.0, "step": 5501 }, { "epoch": 1.8779655231268135, "grad_norm": 0.22235773771927866, "learning_rate": 2.0776428932726355e-05, "loss": 0.274, "num_tokens": 1049891471.0, "step": 5502 }, { "epoch": 1.8783068783068781, "grad_norm": 0.23420264861579063, "learning_rate": 2.07701062215478e-05, "loss": 0.3182, "num_tokens": 1050041862.0, "step": 5503 }, { "epoch": 1.8786482334869432, "grad_norm": 0.2595253394968055, "learning_rate": 2.0763783510369246e-05, "loss": 0.3005, "num_tokens": 1050222295.0, "step": 5504 }, { "epoch": 1.878989588667008, "grad_norm": 0.20467267234690095, "learning_rate": 2.0757460799190696e-05, "loss": 0.3141, "num_tokens": 1050401018.0, "step": 5505 }, { "epoch": 1.8793309438470729, "grad_norm": 0.23843431985737162, "learning_rate": 2.075113808801214e-05, "loss": 0.3118, "num_tokens": 1050552252.0, "step": 5506 }, { "epoch": 1.8796722990271377, "grad_norm": 0.22927186418252532, "learning_rate": 2.0744815376833587e-05, "loss": 0.2811, "num_tokens": 1050729689.0, "step": 5507 }, { "epoch": 1.8800136542072026, "grad_norm": 0.18280498664303718, "learning_rate": 2.0738492665655034e-05, "loss": 0.2951, "num_tokens": 1050931459.0, "step": 5508 }, { "epoch": 1.8803550093872674, "grad_norm": 0.20992360338087993, "learning_rate": 2.073216995447648e-05, "loss": 0.3204, "num_tokens": 1051145459.0, "step": 5509 }, { "epoch": 1.8806963645673322, "grad_norm": 0.21571396805268322, "learning_rate": 2.0725847243297928e-05, "loss": 0.2938, "num_tokens": 1051326085.0, "step": 5510 }, { "epoch": 1.8810377197473973, "grad_norm": 0.23469168273264962, "learning_rate": 2.0719524532119375e-05, "loss": 0.2915, "num_tokens": 1051472369.0, "step": 5511 }, { "epoch": 1.881379074927462, "grad_norm": 0.2579329244761947, "learning_rate": 2.071320182094082e-05, "loss": 0.304, "num_tokens": 1051651197.0, "step": 5512 }, { "epoch": 1.881720430107527, "grad_norm": 0.19401272701890038, "learning_rate": 2.0706879109762266e-05, "loss": 0.2913, "num_tokens": 1051882918.0, "step": 5513 }, { "epoch": 1.8820617852875916, "grad_norm": 0.19525340812085418, "learning_rate": 2.0700556398583716e-05, "loss": 0.2803, "num_tokens": 1052077720.0, "step": 5514 }, { "epoch": 1.8824031404676567, "grad_norm": 0.2165113344572307, "learning_rate": 2.069423368740516e-05, "loss": 0.3037, "num_tokens": 1052258698.0, "step": 5515 }, { "epoch": 1.8827444956477215, "grad_norm": 0.1884834435738288, "learning_rate": 2.0687910976226606e-05, "loss": 0.3207, "num_tokens": 1052456913.0, "step": 5516 }, { "epoch": 1.8830858508277863, "grad_norm": 0.22519604707778143, "learning_rate": 2.0681588265048053e-05, "loss": 0.29, "num_tokens": 1052648402.0, "step": 5517 }, { "epoch": 1.8834272060078512, "grad_norm": 0.20177334278676545, "learning_rate": 2.06752655538695e-05, "loss": 0.3116, "num_tokens": 1052830192.0, "step": 5518 }, { "epoch": 1.883768561187916, "grad_norm": 0.22494472873928664, "learning_rate": 2.0668942842690947e-05, "loss": 0.313, "num_tokens": 1053043454.0, "step": 5519 }, { "epoch": 1.8841099163679809, "grad_norm": 0.22698240529095304, "learning_rate": 2.0662620131512394e-05, "loss": 0.349, "num_tokens": 1053254177.0, "step": 5520 }, { "epoch": 1.8844512715480457, "grad_norm": 0.20289667635991016, "learning_rate": 2.0656297420333838e-05, "loss": 0.2912, "num_tokens": 1053451276.0, "step": 5521 }, { "epoch": 1.8847926267281108, "grad_norm": 0.21815442989397432, "learning_rate": 2.064997470915529e-05, "loss": 0.3298, "num_tokens": 1053660258.0, "step": 5522 }, { "epoch": 1.8851339819081754, "grad_norm": 0.21313036119154646, "learning_rate": 2.0643651997976735e-05, "loss": 0.2812, "num_tokens": 1053857010.0, "step": 5523 }, { "epoch": 1.8854753370882404, "grad_norm": 0.20908693181308705, "learning_rate": 2.063732928679818e-05, "loss": 0.3191, "num_tokens": 1054046842.0, "step": 5524 }, { "epoch": 1.885816692268305, "grad_norm": 0.21323359976898765, "learning_rate": 2.0631006575619626e-05, "loss": 0.3058, "num_tokens": 1054205894.0, "step": 5525 }, { "epoch": 1.8861580474483701, "grad_norm": 0.22882354168172664, "learning_rate": 2.0624683864441073e-05, "loss": 0.2914, "num_tokens": 1054424380.0, "step": 5526 }, { "epoch": 1.8864994026284347, "grad_norm": 0.21305063574016586, "learning_rate": 2.061836115326252e-05, "loss": 0.2961, "num_tokens": 1054611215.0, "step": 5527 }, { "epoch": 1.8868407578084998, "grad_norm": 0.229393311394616, "learning_rate": 2.0612038442083967e-05, "loss": 0.2968, "num_tokens": 1054789529.0, "step": 5528 }, { "epoch": 1.8871821129885646, "grad_norm": 0.22342859322948994, "learning_rate": 2.0605715730905414e-05, "loss": 0.3092, "num_tokens": 1054999293.0, "step": 5529 }, { "epoch": 1.8875234681686295, "grad_norm": 0.21950207308544717, "learning_rate": 2.059939301972686e-05, "loss": 0.3086, "num_tokens": 1055186271.0, "step": 5530 }, { "epoch": 1.8878648233486943, "grad_norm": 0.20579354717593387, "learning_rate": 2.0593070308548308e-05, "loss": 0.2675, "num_tokens": 1055364460.0, "step": 5531 }, { "epoch": 1.8882061785287592, "grad_norm": 0.21400803085738634, "learning_rate": 2.0586747597369755e-05, "loss": 0.3176, "num_tokens": 1055564192.0, "step": 5532 }, { "epoch": 1.888547533708824, "grad_norm": 0.21929622322821996, "learning_rate": 2.05804248861912e-05, "loss": 0.2752, "num_tokens": 1055739224.0, "step": 5533 }, { "epoch": 1.8888888888888888, "grad_norm": 0.227673240661319, "learning_rate": 2.0574102175012645e-05, "loss": 0.3122, "num_tokens": 1055913347.0, "step": 5534 }, { "epoch": 1.889230244068954, "grad_norm": 0.20618106088651825, "learning_rate": 2.0567779463834096e-05, "loss": 0.2829, "num_tokens": 1056104071.0, "step": 5535 }, { "epoch": 1.8895715992490185, "grad_norm": 0.22129858874089936, "learning_rate": 2.056145675265554e-05, "loss": 0.2762, "num_tokens": 1056285155.0, "step": 5536 }, { "epoch": 1.8899129544290836, "grad_norm": 0.2331194946630033, "learning_rate": 2.0555134041476986e-05, "loss": 0.3229, "num_tokens": 1056434617.0, "step": 5537 }, { "epoch": 1.8902543096091482, "grad_norm": 0.20885242380114652, "learning_rate": 2.0548811330298433e-05, "loss": 0.3016, "num_tokens": 1056619677.0, "step": 5538 }, { "epoch": 1.8905956647892133, "grad_norm": 0.21887158234093604, "learning_rate": 2.054248861911988e-05, "loss": 0.2757, "num_tokens": 1056773817.0, "step": 5539 }, { "epoch": 1.8909370199692779, "grad_norm": 0.1975011771817886, "learning_rate": 2.0536165907941327e-05, "loss": 0.2962, "num_tokens": 1056974052.0, "step": 5540 }, { "epoch": 1.891278375149343, "grad_norm": 0.24013001845563597, "learning_rate": 2.0529843196762774e-05, "loss": 0.3057, "num_tokens": 1057197928.0, "step": 5541 }, { "epoch": 1.8916197303294078, "grad_norm": 0.182939772454417, "learning_rate": 2.0523520485584218e-05, "loss": 0.3091, "num_tokens": 1057398847.0, "step": 5542 }, { "epoch": 1.8919610855094726, "grad_norm": 0.2275700707864671, "learning_rate": 2.0517197774405665e-05, "loss": 0.3019, "num_tokens": 1057611844.0, "step": 5543 }, { "epoch": 1.8923024406895375, "grad_norm": 0.2087789121234919, "learning_rate": 2.0510875063227115e-05, "loss": 0.2996, "num_tokens": 1057812819.0, "step": 5544 }, { "epoch": 1.8926437958696023, "grad_norm": 0.2134143263729858, "learning_rate": 2.050455235204856e-05, "loss": 0.3098, "num_tokens": 1058028269.0, "step": 5545 }, { "epoch": 1.8929851510496671, "grad_norm": 0.19300257068784973, "learning_rate": 2.0498229640870006e-05, "loss": 0.2851, "num_tokens": 1058173340.0, "step": 5546 }, { "epoch": 1.893326506229732, "grad_norm": 0.24818795744621297, "learning_rate": 2.0491906929691453e-05, "loss": 0.3156, "num_tokens": 1058351811.0, "step": 5547 }, { "epoch": 1.893667861409797, "grad_norm": 0.2131652274540424, "learning_rate": 2.04855842185129e-05, "loss": 0.3026, "num_tokens": 1058546364.0, "step": 5548 }, { "epoch": 1.8940092165898617, "grad_norm": 0.21147564083753526, "learning_rate": 2.0479261507334347e-05, "loss": 0.2949, "num_tokens": 1058732725.0, "step": 5549 }, { "epoch": 1.8943505717699267, "grad_norm": 0.22245077211475173, "learning_rate": 2.0472938796155794e-05, "loss": 0.3076, "num_tokens": 1058956690.0, "step": 5550 }, { "epoch": 1.8946919269499913, "grad_norm": 0.18203231655779645, "learning_rate": 2.0466616084977237e-05, "loss": 0.3037, "num_tokens": 1059159119.0, "step": 5551 }, { "epoch": 1.8950332821300564, "grad_norm": 0.1894409935401175, "learning_rate": 2.0460293373798688e-05, "loss": 0.3183, "num_tokens": 1059348583.0, "step": 5552 }, { "epoch": 1.8953746373101212, "grad_norm": 0.2273338227749071, "learning_rate": 2.0453970662620135e-05, "loss": 0.2769, "num_tokens": 1059563527.0, "step": 5553 }, { "epoch": 1.895715992490186, "grad_norm": 0.19504981589037051, "learning_rate": 2.0447647951441578e-05, "loss": 0.2984, "num_tokens": 1059771036.0, "step": 5554 }, { "epoch": 1.896057347670251, "grad_norm": 0.2052972422649129, "learning_rate": 2.0441325240263025e-05, "loss": 0.2888, "num_tokens": 1059971463.0, "step": 5555 }, { "epoch": 1.8963987028503158, "grad_norm": 0.20348851129212212, "learning_rate": 2.0435002529084472e-05, "loss": 0.29, "num_tokens": 1060168476.0, "step": 5556 }, { "epoch": 1.8967400580303806, "grad_norm": 0.20368186010550993, "learning_rate": 2.042867981790592e-05, "loss": 0.2801, "num_tokens": 1060342248.0, "step": 5557 }, { "epoch": 1.8970814132104454, "grad_norm": 0.25764694445630065, "learning_rate": 2.0422357106727366e-05, "loss": 0.2906, "num_tokens": 1060487606.0, "step": 5558 }, { "epoch": 1.8974227683905103, "grad_norm": 0.23312457922990668, "learning_rate": 2.041603439554881e-05, "loss": 0.3053, "num_tokens": 1060633914.0, "step": 5559 }, { "epoch": 1.8977641235705751, "grad_norm": 0.21973353691701428, "learning_rate": 2.040971168437026e-05, "loss": 0.2875, "num_tokens": 1060789432.0, "step": 5560 }, { "epoch": 1.8981054787506402, "grad_norm": 0.22421976301142285, "learning_rate": 2.0403388973191707e-05, "loss": 0.2863, "num_tokens": 1060970109.0, "step": 5561 }, { "epoch": 1.8984468339307048, "grad_norm": 0.2723736280354391, "learning_rate": 2.039706626201315e-05, "loss": 0.3051, "num_tokens": 1061146841.0, "step": 5562 }, { "epoch": 1.8987881891107699, "grad_norm": 0.21867316024452005, "learning_rate": 2.0390743550834598e-05, "loss": 0.3185, "num_tokens": 1061370659.0, "step": 5563 }, { "epoch": 1.8991295442908345, "grad_norm": 0.19930223520508908, "learning_rate": 2.0384420839656045e-05, "loss": 0.3004, "num_tokens": 1061582307.0, "step": 5564 }, { "epoch": 1.8994708994708995, "grad_norm": 0.21275298192744338, "learning_rate": 2.0378098128477492e-05, "loss": 0.3196, "num_tokens": 1061750247.0, "step": 5565 }, { "epoch": 1.8998122546509644, "grad_norm": 0.2077884753684608, "learning_rate": 2.037177541729894e-05, "loss": 0.3129, "num_tokens": 1061980327.0, "step": 5566 }, { "epoch": 1.9001536098310292, "grad_norm": 0.21800004873280535, "learning_rate": 2.0365452706120386e-05, "loss": 0.2824, "num_tokens": 1062162289.0, "step": 5567 }, { "epoch": 1.900494965011094, "grad_norm": 0.22847762075380632, "learning_rate": 2.035912999494183e-05, "loss": 0.3093, "num_tokens": 1062343884.0, "step": 5568 }, { "epoch": 1.900836320191159, "grad_norm": 0.20248197262726533, "learning_rate": 2.035280728376328e-05, "loss": 0.3135, "num_tokens": 1062558594.0, "step": 5569 }, { "epoch": 1.9011776753712237, "grad_norm": 0.2245012639131152, "learning_rate": 2.0346484572584727e-05, "loss": 0.3315, "num_tokens": 1062759305.0, "step": 5570 }, { "epoch": 1.9015190305512886, "grad_norm": 0.23137915495498115, "learning_rate": 2.034016186140617e-05, "loss": 0.2895, "num_tokens": 1062924873.0, "step": 5571 }, { "epoch": 1.9018603857313536, "grad_norm": 0.21846790299392346, "learning_rate": 2.0333839150227617e-05, "loss": 0.325, "num_tokens": 1063153355.0, "step": 5572 }, { "epoch": 1.9022017409114182, "grad_norm": 0.18914386370549127, "learning_rate": 2.0327516439049064e-05, "loss": 0.3001, "num_tokens": 1063365892.0, "step": 5573 }, { "epoch": 1.9025430960914833, "grad_norm": 0.2250164958206104, "learning_rate": 2.032119372787051e-05, "loss": 0.2943, "num_tokens": 1063571827.0, "step": 5574 }, { "epoch": 1.902884451271548, "grad_norm": 0.2252689269512496, "learning_rate": 2.0314871016691958e-05, "loss": 0.3022, "num_tokens": 1063762466.0, "step": 5575 }, { "epoch": 1.903225806451613, "grad_norm": 0.21447941143280294, "learning_rate": 2.0308548305513405e-05, "loss": 0.2787, "num_tokens": 1063970200.0, "step": 5576 }, { "epoch": 1.9035671616316776, "grad_norm": 0.21013856595645625, "learning_rate": 2.0302225594334852e-05, "loss": 0.3361, "num_tokens": 1064159732.0, "step": 5577 }, { "epoch": 1.9039085168117427, "grad_norm": 0.22274278029486919, "learning_rate": 2.02959028831563e-05, "loss": 0.3111, "num_tokens": 1064326891.0, "step": 5578 }, { "epoch": 1.9042498719918075, "grad_norm": 0.22126646032277292, "learning_rate": 2.0289580171977746e-05, "loss": 0.2889, "num_tokens": 1064521901.0, "step": 5579 }, { "epoch": 1.9045912271718723, "grad_norm": 0.2223700457327552, "learning_rate": 2.028325746079919e-05, "loss": 0.3395, "num_tokens": 1064741933.0, "step": 5580 }, { "epoch": 1.9049325823519372, "grad_norm": 0.22901251179505075, "learning_rate": 2.0276934749620637e-05, "loss": 0.2977, "num_tokens": 1064912999.0, "step": 5581 }, { "epoch": 1.905273937532002, "grad_norm": 0.2064566376661045, "learning_rate": 2.0270612038442087e-05, "loss": 0.2756, "num_tokens": 1065094228.0, "step": 5582 }, { "epoch": 1.9056152927120669, "grad_norm": 0.2190905698184527, "learning_rate": 2.026428932726353e-05, "loss": 0.2991, "num_tokens": 1065263537.0, "step": 5583 }, { "epoch": 1.9059566478921317, "grad_norm": 0.21419209037551584, "learning_rate": 2.0257966616084978e-05, "loss": 0.2952, "num_tokens": 1065458578.0, "step": 5584 }, { "epoch": 1.9062980030721968, "grad_norm": 0.21139566852299554, "learning_rate": 2.0251643904906425e-05, "loss": 0.2967, "num_tokens": 1065665526.0, "step": 5585 }, { "epoch": 1.9066393582522614, "grad_norm": 0.19453184672695453, "learning_rate": 2.024532119372787e-05, "loss": 0.3276, "num_tokens": 1065893019.0, "step": 5586 }, { "epoch": 1.9069807134323264, "grad_norm": 0.22647969255642367, "learning_rate": 2.023899848254932e-05, "loss": 0.3134, "num_tokens": 1066091625.0, "step": 5587 }, { "epoch": 1.907322068612391, "grad_norm": 0.21728518560730087, "learning_rate": 2.0232675771370766e-05, "loss": 0.3201, "num_tokens": 1066306938.0, "step": 5588 }, { "epoch": 1.9076634237924561, "grad_norm": 0.22886400960609396, "learning_rate": 2.022635306019221e-05, "loss": 0.2936, "num_tokens": 1066495095.0, "step": 5589 }, { "epoch": 1.9080047789725207, "grad_norm": 0.2213007319020692, "learning_rate": 2.0220030349013656e-05, "loss": 0.2824, "num_tokens": 1066660758.0, "step": 5590 }, { "epoch": 1.9083461341525858, "grad_norm": 0.2240315535122396, "learning_rate": 2.0213707637835106e-05, "loss": 0.299, "num_tokens": 1066867768.0, "step": 5591 }, { "epoch": 1.9086874893326506, "grad_norm": 0.22088599039029413, "learning_rate": 2.020738492665655e-05, "loss": 0.2953, "num_tokens": 1067038427.0, "step": 5592 }, { "epoch": 1.9090288445127155, "grad_norm": 0.20950567212144627, "learning_rate": 2.0201062215477997e-05, "loss": 0.2862, "num_tokens": 1067229169.0, "step": 5593 }, { "epoch": 1.9093701996927803, "grad_norm": 0.20431649048278164, "learning_rate": 2.0194739504299444e-05, "loss": 0.2966, "num_tokens": 1067425291.0, "step": 5594 }, { "epoch": 1.9097115548728452, "grad_norm": 0.22034462297149696, "learning_rate": 2.018841679312089e-05, "loss": 0.3219, "num_tokens": 1067633554.0, "step": 5595 }, { "epoch": 1.91005291005291, "grad_norm": 0.24037817251068766, "learning_rate": 2.0182094081942338e-05, "loss": 0.2819, "num_tokens": 1067792840.0, "step": 5596 }, { "epoch": 1.9103942652329748, "grad_norm": 0.21786235675887805, "learning_rate": 2.0175771370763785e-05, "loss": 0.3157, "num_tokens": 1068012733.0, "step": 5597 }, { "epoch": 1.91073562041304, "grad_norm": 0.21839540931173973, "learning_rate": 2.016944865958523e-05, "loss": 0.2879, "num_tokens": 1068194257.0, "step": 5598 }, { "epoch": 1.9110769755931045, "grad_norm": 0.21900797631218943, "learning_rate": 2.016312594840668e-05, "loss": 0.2802, "num_tokens": 1068402477.0, "step": 5599 }, { "epoch": 1.9114183307731696, "grad_norm": 0.20917532376644385, "learning_rate": 2.0156803237228126e-05, "loss": 0.3105, "num_tokens": 1068571265.0, "step": 5600 }, { "epoch": 1.9117596859532342, "grad_norm": 0.23170830539168122, "learning_rate": 2.015048052604957e-05, "loss": 0.3163, "num_tokens": 1068783391.0, "step": 5601 }, { "epoch": 1.9121010411332993, "grad_norm": 0.21756580071338905, "learning_rate": 2.0144157814871017e-05, "loss": 0.2998, "num_tokens": 1068998848.0, "step": 5602 }, { "epoch": 1.912442396313364, "grad_norm": 0.1941647849246422, "learning_rate": 2.0137835103692464e-05, "loss": 0.3035, "num_tokens": 1069198477.0, "step": 5603 }, { "epoch": 1.912783751493429, "grad_norm": 0.21264592469816915, "learning_rate": 2.013151239251391e-05, "loss": 0.3015, "num_tokens": 1069411410.0, "step": 5604 }, { "epoch": 1.9131251066734938, "grad_norm": 0.2270178504235975, "learning_rate": 2.0125189681335357e-05, "loss": 0.2788, "num_tokens": 1069562805.0, "step": 5605 }, { "epoch": 1.9134664618535586, "grad_norm": 0.2730544475658694, "learning_rate": 2.0118866970156804e-05, "loss": 0.2743, "num_tokens": 1069712994.0, "step": 5606 }, { "epoch": 1.9138078170336235, "grad_norm": 0.19941969488219038, "learning_rate": 2.011254425897825e-05, "loss": 0.2894, "num_tokens": 1069933397.0, "step": 5607 }, { "epoch": 1.9141491722136883, "grad_norm": 0.19960161806956836, "learning_rate": 2.01062215477997e-05, "loss": 0.312, "num_tokens": 1070105754.0, "step": 5608 }, { "epoch": 1.9144905273937534, "grad_norm": 0.24157318250291934, "learning_rate": 2.0099898836621145e-05, "loss": 0.288, "num_tokens": 1070311790.0, "step": 5609 }, { "epoch": 1.914831882573818, "grad_norm": 0.2199096733796592, "learning_rate": 2.009357612544259e-05, "loss": 0.3045, "num_tokens": 1070515393.0, "step": 5610 }, { "epoch": 1.915173237753883, "grad_norm": 0.19462816164431826, "learning_rate": 2.0087253414264036e-05, "loss": 0.3005, "num_tokens": 1070721441.0, "step": 5611 }, { "epoch": 1.9155145929339477, "grad_norm": 0.2155196984176989, "learning_rate": 2.0080930703085486e-05, "loss": 0.3302, "num_tokens": 1070928788.0, "step": 5612 }, { "epoch": 1.9158559481140127, "grad_norm": 0.19778015869962895, "learning_rate": 2.007460799190693e-05, "loss": 0.2894, "num_tokens": 1071110864.0, "step": 5613 }, { "epoch": 1.9161973032940773, "grad_norm": 0.22296523113850567, "learning_rate": 2.0068285280728377e-05, "loss": 0.3104, "num_tokens": 1071284570.0, "step": 5614 }, { "epoch": 1.9165386584741424, "grad_norm": 0.23243996445569404, "learning_rate": 2.0061962569549824e-05, "loss": 0.3139, "num_tokens": 1071463877.0, "step": 5615 }, { "epoch": 1.9168800136542072, "grad_norm": 0.21680251879550166, "learning_rate": 2.005563985837127e-05, "loss": 0.3073, "num_tokens": 1071640349.0, "step": 5616 }, { "epoch": 1.917221368834272, "grad_norm": 0.24602432189632797, "learning_rate": 2.0049317147192718e-05, "loss": 0.2994, "num_tokens": 1071848808.0, "step": 5617 }, { "epoch": 1.917562724014337, "grad_norm": 0.20374507176068174, "learning_rate": 2.0042994436014165e-05, "loss": 0.286, "num_tokens": 1072006655.0, "step": 5618 }, { "epoch": 1.9179040791944018, "grad_norm": 0.22903055045335435, "learning_rate": 2.003667172483561e-05, "loss": 0.3587, "num_tokens": 1072229083.0, "step": 5619 }, { "epoch": 1.9182454343744666, "grad_norm": 0.18248494951557853, "learning_rate": 2.0030349013657055e-05, "loss": 0.2825, "num_tokens": 1072422548.0, "step": 5620 }, { "epoch": 1.9185867895545314, "grad_norm": 0.24147396879831867, "learning_rate": 2.0024026302478506e-05, "loss": 0.3313, "num_tokens": 1072612738.0, "step": 5621 }, { "epoch": 1.9189281447345965, "grad_norm": 0.24100770260910226, "learning_rate": 2.001770359129995e-05, "loss": 0.2809, "num_tokens": 1072791646.0, "step": 5622 }, { "epoch": 1.9192694999146611, "grad_norm": 0.19640763142543058, "learning_rate": 2.0011380880121396e-05, "loss": 0.3486, "num_tokens": 1073015340.0, "step": 5623 }, { "epoch": 1.9196108550947262, "grad_norm": 0.22800755470725256, "learning_rate": 2.0005058168942843e-05, "loss": 0.3177, "num_tokens": 1073173888.0, "step": 5624 }, { "epoch": 1.9199522102747908, "grad_norm": 0.23318919619392683, "learning_rate": 1.999873545776429e-05, "loss": 0.3135, "num_tokens": 1073361944.0, "step": 5625 }, { "epoch": 1.9202935654548559, "grad_norm": 0.24088249911449633, "learning_rate": 1.9992412746585737e-05, "loss": 0.3064, "num_tokens": 1073586507.0, "step": 5626 }, { "epoch": 1.9206349206349205, "grad_norm": 0.2056957118514012, "learning_rate": 1.9986090035407184e-05, "loss": 0.2725, "num_tokens": 1073777992.0, "step": 5627 }, { "epoch": 1.9209762758149855, "grad_norm": 0.19883352365376414, "learning_rate": 1.9979767324228628e-05, "loss": 0.3249, "num_tokens": 1073981331.0, "step": 5628 }, { "epoch": 1.9213176309950504, "grad_norm": 0.3374820773586219, "learning_rate": 1.9973444613050078e-05, "loss": 0.3122, "num_tokens": 1074146128.0, "step": 5629 }, { "epoch": 1.9216589861751152, "grad_norm": 0.21616262549161655, "learning_rate": 1.9967121901871525e-05, "loss": 0.2925, "num_tokens": 1074313816.0, "step": 5630 }, { "epoch": 1.92200034135518, "grad_norm": 0.2564495952784218, "learning_rate": 1.996079919069297e-05, "loss": 0.3272, "num_tokens": 1074507148.0, "step": 5631 }, { "epoch": 1.922341696535245, "grad_norm": 0.20246437810010967, "learning_rate": 1.9954476479514416e-05, "loss": 0.287, "num_tokens": 1074691496.0, "step": 5632 }, { "epoch": 1.9226830517153097, "grad_norm": 0.21056504018184638, "learning_rate": 1.9948153768335863e-05, "loss": 0.283, "num_tokens": 1074884111.0, "step": 5633 }, { "epoch": 1.9230244068953746, "grad_norm": 0.2038646388039311, "learning_rate": 1.994183105715731e-05, "loss": 0.2815, "num_tokens": 1075049203.0, "step": 5634 }, { "epoch": 1.9233657620754396, "grad_norm": 0.20981377640773144, "learning_rate": 1.9935508345978757e-05, "loss": 0.3032, "num_tokens": 1075267022.0, "step": 5635 }, { "epoch": 1.9237071172555043, "grad_norm": 0.2112600989935164, "learning_rate": 1.9929185634800204e-05, "loss": 0.3132, "num_tokens": 1075442035.0, "step": 5636 }, { "epoch": 1.9240484724355693, "grad_norm": 0.2146253156669467, "learning_rate": 1.992286292362165e-05, "loss": 0.3061, "num_tokens": 1075656179.0, "step": 5637 }, { "epoch": 1.924389827615634, "grad_norm": 0.21833269846408587, "learning_rate": 1.9916540212443098e-05, "loss": 0.2883, "num_tokens": 1075845593.0, "step": 5638 }, { "epoch": 1.924731182795699, "grad_norm": 0.2141540065726319, "learning_rate": 1.9910217501264545e-05, "loss": 0.3138, "num_tokens": 1076062593.0, "step": 5639 }, { "epoch": 1.9250725379757638, "grad_norm": 0.21303119282833122, "learning_rate": 1.990389479008599e-05, "loss": 0.2834, "num_tokens": 1076239180.0, "step": 5640 }, { "epoch": 1.9254138931558287, "grad_norm": 0.291917652862881, "learning_rate": 1.9897572078907435e-05, "loss": 0.2918, "num_tokens": 1076434419.0, "step": 5641 }, { "epoch": 1.9257552483358935, "grad_norm": 0.2076265413623802, "learning_rate": 1.9891249367728886e-05, "loss": 0.2987, "num_tokens": 1076614698.0, "step": 5642 }, { "epoch": 1.9260966035159583, "grad_norm": 0.27361761137104185, "learning_rate": 1.988492665655033e-05, "loss": 0.3047, "num_tokens": 1076809929.0, "step": 5643 }, { "epoch": 1.9264379586960232, "grad_norm": 0.2085959432520553, "learning_rate": 1.9878603945371776e-05, "loss": 0.3269, "num_tokens": 1077016816.0, "step": 5644 }, { "epoch": 1.926779313876088, "grad_norm": 0.22784542837527885, "learning_rate": 1.9872281234193223e-05, "loss": 0.2924, "num_tokens": 1077170227.0, "step": 5645 }, { "epoch": 1.927120669056153, "grad_norm": 0.22658471127616858, "learning_rate": 1.986595852301467e-05, "loss": 0.2782, "num_tokens": 1077359466.0, "step": 5646 }, { "epoch": 1.9274620242362177, "grad_norm": 0.19077493116862687, "learning_rate": 1.9859635811836117e-05, "loss": 0.3021, "num_tokens": 1077567076.0, "step": 5647 }, { "epoch": 1.9278033794162828, "grad_norm": 0.18884825531233027, "learning_rate": 1.9853313100657564e-05, "loss": 0.2729, "num_tokens": 1077721564.0, "step": 5648 }, { "epoch": 1.9281447345963474, "grad_norm": 0.2236759295255914, "learning_rate": 1.9846990389479008e-05, "loss": 0.31, "num_tokens": 1077920277.0, "step": 5649 }, { "epoch": 1.9284860897764124, "grad_norm": 0.2040695767608169, "learning_rate": 1.9840667678300455e-05, "loss": 0.3003, "num_tokens": 1078155143.0, "step": 5650 }, { "epoch": 1.928827444956477, "grad_norm": 0.2047203858347617, "learning_rate": 1.9834344967121905e-05, "loss": 0.2848, "num_tokens": 1078330391.0, "step": 5651 }, { "epoch": 1.9291688001365421, "grad_norm": 0.23479484426512107, "learning_rate": 1.982802225594335e-05, "loss": 0.2916, "num_tokens": 1078503605.0, "step": 5652 }, { "epoch": 1.929510155316607, "grad_norm": 0.19994948949689306, "learning_rate": 1.9821699544764796e-05, "loss": 0.2758, "num_tokens": 1078682172.0, "step": 5653 }, { "epoch": 1.9298515104966718, "grad_norm": 0.18815480036862192, "learning_rate": 1.9815376833586243e-05, "loss": 0.2914, "num_tokens": 1078861444.0, "step": 5654 }, { "epoch": 1.9301928656767366, "grad_norm": 0.21025356349388866, "learning_rate": 1.980905412240769e-05, "loss": 0.3073, "num_tokens": 1079047062.0, "step": 5655 }, { "epoch": 1.9305342208568015, "grad_norm": 0.21514492793935674, "learning_rate": 1.9802731411229137e-05, "loss": 0.3069, "num_tokens": 1079247466.0, "step": 5656 }, { "epoch": 1.9308755760368663, "grad_norm": 0.21277146862550333, "learning_rate": 1.9796408700050584e-05, "loss": 0.2986, "num_tokens": 1079406259.0, "step": 5657 }, { "epoch": 1.9312169312169312, "grad_norm": 0.21428123827701093, "learning_rate": 1.9790085988872027e-05, "loss": 0.3288, "num_tokens": 1079613008.0, "step": 5658 }, { "epoch": 1.9315582863969962, "grad_norm": 0.23094818001435724, "learning_rate": 1.9783763277693478e-05, "loss": 0.3086, "num_tokens": 1079789629.0, "step": 5659 }, { "epoch": 1.9318996415770608, "grad_norm": 0.2381496023380934, "learning_rate": 1.9777440566514925e-05, "loss": 0.3051, "num_tokens": 1079971296.0, "step": 5660 }, { "epoch": 1.932240996757126, "grad_norm": 0.2263087645018506, "learning_rate": 1.9771117855336368e-05, "loss": 0.3174, "num_tokens": 1080171493.0, "step": 5661 }, { "epoch": 1.9325823519371905, "grad_norm": 0.20420759212446202, "learning_rate": 1.9764795144157815e-05, "loss": 0.3192, "num_tokens": 1080386820.0, "step": 5662 }, { "epoch": 1.9329237071172556, "grad_norm": 0.21716471919851385, "learning_rate": 1.9758472432979262e-05, "loss": 0.3162, "num_tokens": 1080566934.0, "step": 5663 }, { "epoch": 1.9332650622973202, "grad_norm": 0.24290490680706572, "learning_rate": 1.975214972180071e-05, "loss": 0.3052, "num_tokens": 1080732650.0, "step": 5664 }, { "epoch": 1.9336064174773853, "grad_norm": 0.20763239778964898, "learning_rate": 1.9745827010622156e-05, "loss": 0.2818, "num_tokens": 1080901438.0, "step": 5665 }, { "epoch": 1.93394777265745, "grad_norm": 0.23798347631030983, "learning_rate": 1.9739504299443603e-05, "loss": 0.301, "num_tokens": 1081064712.0, "step": 5666 }, { "epoch": 1.934289127837515, "grad_norm": 0.2244013778767832, "learning_rate": 1.9733181588265047e-05, "loss": 0.3075, "num_tokens": 1081278164.0, "step": 5667 }, { "epoch": 1.9346304830175798, "grad_norm": 0.2182097682525624, "learning_rate": 1.9726858877086497e-05, "loss": 0.3093, "num_tokens": 1081473186.0, "step": 5668 }, { "epoch": 1.9349718381976446, "grad_norm": 0.23821663844419688, "learning_rate": 1.9720536165907944e-05, "loss": 0.3044, "num_tokens": 1081671243.0, "step": 5669 }, { "epoch": 1.9353131933777095, "grad_norm": 0.21156593939026808, "learning_rate": 1.9714213454729388e-05, "loss": 0.3246, "num_tokens": 1081864624.0, "step": 5670 }, { "epoch": 1.9356545485577743, "grad_norm": 0.21715602439374862, "learning_rate": 1.9707890743550835e-05, "loss": 0.3017, "num_tokens": 1082035728.0, "step": 5671 }, { "epoch": 1.9359959037378394, "grad_norm": 0.24414488799723144, "learning_rate": 1.9701568032372285e-05, "loss": 0.3163, "num_tokens": 1082217228.0, "step": 5672 }, { "epoch": 1.936337258917904, "grad_norm": 0.2094239553988862, "learning_rate": 1.969524532119373e-05, "loss": 0.2904, "num_tokens": 1082397257.0, "step": 5673 }, { "epoch": 1.936678614097969, "grad_norm": 0.2061992100323064, "learning_rate": 1.9688922610015176e-05, "loss": 0.2866, "num_tokens": 1082594058.0, "step": 5674 }, { "epoch": 1.9370199692780337, "grad_norm": 0.2345638487028726, "learning_rate": 1.9682599898836623e-05, "loss": 0.3243, "num_tokens": 1082766184.0, "step": 5675 }, { "epoch": 1.9373613244580987, "grad_norm": 0.23019643492575287, "learning_rate": 1.967627718765807e-05, "loss": 0.2939, "num_tokens": 1082938869.0, "step": 5676 }, { "epoch": 1.9377026796381636, "grad_norm": 0.2278027514399995, "learning_rate": 1.9669954476479517e-05, "loss": 0.3111, "num_tokens": 1083142695.0, "step": 5677 }, { "epoch": 1.9380440348182284, "grad_norm": 0.21840625986313184, "learning_rate": 1.9663631765300964e-05, "loss": 0.3059, "num_tokens": 1083317245.0, "step": 5678 }, { "epoch": 1.9383853899982932, "grad_norm": 0.24142576179035438, "learning_rate": 1.9657309054122407e-05, "loss": 0.3175, "num_tokens": 1083524392.0, "step": 5679 }, { "epoch": 1.938726745178358, "grad_norm": 0.1894713547659185, "learning_rate": 1.9650986342943854e-05, "loss": 0.3042, "num_tokens": 1083717668.0, "step": 5680 }, { "epoch": 1.939068100358423, "grad_norm": 0.23139304723683518, "learning_rate": 1.9644663631765304e-05, "loss": 0.3078, "num_tokens": 1083898517.0, "step": 5681 }, { "epoch": 1.9394094555384878, "grad_norm": 0.21935897546876812, "learning_rate": 1.9638340920586748e-05, "loss": 0.3124, "num_tokens": 1084089425.0, "step": 5682 }, { "epoch": 1.9397508107185528, "grad_norm": 0.21268675570225734, "learning_rate": 1.9632018209408195e-05, "loss": 0.2798, "num_tokens": 1084264733.0, "step": 5683 }, { "epoch": 1.9400921658986174, "grad_norm": 0.2189693894794, "learning_rate": 1.9625695498229642e-05, "loss": 0.3064, "num_tokens": 1084459623.0, "step": 5684 }, { "epoch": 1.9404335210786825, "grad_norm": 0.22603906060798384, "learning_rate": 1.961937278705109e-05, "loss": 0.2861, "num_tokens": 1084680187.0, "step": 5685 }, { "epoch": 1.9407748762587471, "grad_norm": 0.22575386429590893, "learning_rate": 1.9613050075872536e-05, "loss": 0.2922, "num_tokens": 1084856233.0, "step": 5686 }, { "epoch": 1.9411162314388122, "grad_norm": 0.21065461692445278, "learning_rate": 1.9606727364693983e-05, "loss": 0.2909, "num_tokens": 1085032437.0, "step": 5687 }, { "epoch": 1.9414575866188768, "grad_norm": 0.21552529924229424, "learning_rate": 1.9600404653515427e-05, "loss": 0.3017, "num_tokens": 1085236988.0, "step": 5688 }, { "epoch": 1.9417989417989419, "grad_norm": 0.21784086996631896, "learning_rate": 1.9594081942336877e-05, "loss": 0.3305, "num_tokens": 1085488771.0, "step": 5689 }, { "epoch": 1.9421402969790067, "grad_norm": 0.21054551514990213, "learning_rate": 1.9587759231158324e-05, "loss": 0.311, "num_tokens": 1085701305.0, "step": 5690 }, { "epoch": 1.9424816521590715, "grad_norm": 0.2131164808033922, "learning_rate": 1.9581436519979768e-05, "loss": 0.3137, "num_tokens": 1085923407.0, "step": 5691 }, { "epoch": 1.9428230073391364, "grad_norm": 0.1957291960636264, "learning_rate": 1.9575113808801215e-05, "loss": 0.3149, "num_tokens": 1086123391.0, "step": 5692 }, { "epoch": 1.9431643625192012, "grad_norm": 0.20912992979119227, "learning_rate": 1.956879109762266e-05, "loss": 0.297, "num_tokens": 1086342023.0, "step": 5693 }, { "epoch": 1.943505717699266, "grad_norm": 0.20646590620497535, "learning_rate": 1.956246838644411e-05, "loss": 0.295, "num_tokens": 1086516317.0, "step": 5694 }, { "epoch": 1.943847072879331, "grad_norm": 0.22833509006001884, "learning_rate": 1.9556145675265555e-05, "loss": 0.3068, "num_tokens": 1086703785.0, "step": 5695 }, { "epoch": 1.944188428059396, "grad_norm": 0.22029422376512423, "learning_rate": 1.9549822964087e-05, "loss": 0.2983, "num_tokens": 1086916671.0, "step": 5696 }, { "epoch": 1.9445297832394606, "grad_norm": 0.19958870165868173, "learning_rate": 1.9543500252908446e-05, "loss": 0.314, "num_tokens": 1087088741.0, "step": 5697 }, { "epoch": 1.9448711384195256, "grad_norm": 0.25036782016438613, "learning_rate": 1.9537177541729896e-05, "loss": 0.2877, "num_tokens": 1087259407.0, "step": 5698 }, { "epoch": 1.9452124935995903, "grad_norm": 0.2227255416719432, "learning_rate": 1.953085483055134e-05, "loss": 0.3174, "num_tokens": 1087445559.0, "step": 5699 }, { "epoch": 1.9455538487796553, "grad_norm": 0.21201260326831053, "learning_rate": 1.9524532119372787e-05, "loss": 0.2871, "num_tokens": 1087641752.0, "step": 5700 }, { "epoch": 1.94589520395972, "grad_norm": 0.19726203937302475, "learning_rate": 1.9518209408194234e-05, "loss": 0.2818, "num_tokens": 1087817784.0, "step": 5701 }, { "epoch": 1.946236559139785, "grad_norm": 0.22376694236423084, "learning_rate": 1.951188669701568e-05, "loss": 0.3086, "num_tokens": 1088017612.0, "step": 5702 }, { "epoch": 1.9465779143198498, "grad_norm": 0.1981058140414904, "learning_rate": 1.9505563985837128e-05, "loss": 0.2705, "num_tokens": 1088191523.0, "step": 5703 }, { "epoch": 1.9469192694999147, "grad_norm": 0.21455882162209428, "learning_rate": 1.9499241274658575e-05, "loss": 0.2927, "num_tokens": 1088374981.0, "step": 5704 }, { "epoch": 1.9472606246799795, "grad_norm": 0.21879507917011395, "learning_rate": 1.949291856348002e-05, "loss": 0.3262, "num_tokens": 1088541748.0, "step": 5705 }, { "epoch": 1.9476019798600444, "grad_norm": 0.23421748234622522, "learning_rate": 1.948659585230147e-05, "loss": 0.3154, "num_tokens": 1088729140.0, "step": 5706 }, { "epoch": 1.9479433350401092, "grad_norm": 0.22775668442994218, "learning_rate": 1.9480273141122916e-05, "loss": 0.3058, "num_tokens": 1088919978.0, "step": 5707 }, { "epoch": 1.948284690220174, "grad_norm": 0.19347102049610249, "learning_rate": 1.947395042994436e-05, "loss": 0.3039, "num_tokens": 1089108965.0, "step": 5708 }, { "epoch": 1.948626045400239, "grad_norm": 0.20328737165120797, "learning_rate": 1.9467627718765806e-05, "loss": 0.3094, "num_tokens": 1089325660.0, "step": 5709 }, { "epoch": 1.9489674005803037, "grad_norm": 0.21180330217650897, "learning_rate": 1.9461305007587253e-05, "loss": 0.2839, "num_tokens": 1089511277.0, "step": 5710 }, { "epoch": 1.9493087557603688, "grad_norm": 0.21306060488572554, "learning_rate": 1.94549822964087e-05, "loss": 0.3021, "num_tokens": 1089715649.0, "step": 5711 }, { "epoch": 1.9496501109404334, "grad_norm": 0.19752071847813693, "learning_rate": 1.9448659585230147e-05, "loss": 0.2858, "num_tokens": 1089908770.0, "step": 5712 }, { "epoch": 1.9499914661204985, "grad_norm": 0.22886754172675547, "learning_rate": 1.9442336874051594e-05, "loss": 0.3024, "num_tokens": 1090092504.0, "step": 5713 }, { "epoch": 1.9503328213005633, "grad_norm": 0.2156096038883154, "learning_rate": 1.943601416287304e-05, "loss": 0.2789, "num_tokens": 1090296441.0, "step": 5714 }, { "epoch": 1.9506741764806281, "grad_norm": 0.2137200644927324, "learning_rate": 1.942969145169449e-05, "loss": 0.3095, "num_tokens": 1090473366.0, "step": 5715 }, { "epoch": 1.951015531660693, "grad_norm": 0.2194205443752252, "learning_rate": 1.9423368740515935e-05, "loss": 0.3344, "num_tokens": 1090675123.0, "step": 5716 }, { "epoch": 1.9513568868407578, "grad_norm": 0.22481060706469388, "learning_rate": 1.941704602933738e-05, "loss": 0.3207, "num_tokens": 1090879026.0, "step": 5717 }, { "epoch": 1.9516982420208226, "grad_norm": 0.21430330918011398, "learning_rate": 1.9410723318158826e-05, "loss": 0.2866, "num_tokens": 1091113184.0, "step": 5718 }, { "epoch": 1.9520395972008875, "grad_norm": 0.20583179084844008, "learning_rate": 1.9404400606980276e-05, "loss": 0.2947, "num_tokens": 1091300281.0, "step": 5719 }, { "epoch": 1.9523809523809523, "grad_norm": 0.2128149902498196, "learning_rate": 1.939807789580172e-05, "loss": 0.3, "num_tokens": 1091498188.0, "step": 5720 }, { "epoch": 1.9527223075610172, "grad_norm": 0.219879652082617, "learning_rate": 1.9391755184623167e-05, "loss": 0.3131, "num_tokens": 1091698099.0, "step": 5721 }, { "epoch": 1.9530636627410822, "grad_norm": 0.2013137601293622, "learning_rate": 1.9385432473444614e-05, "loss": 0.3063, "num_tokens": 1091869429.0, "step": 5722 }, { "epoch": 1.9534050179211468, "grad_norm": 0.244549346036953, "learning_rate": 1.937910976226606e-05, "loss": 0.3073, "num_tokens": 1092031957.0, "step": 5723 }, { "epoch": 1.953746373101212, "grad_norm": 0.25415826914978445, "learning_rate": 1.9372787051087508e-05, "loss": 0.3317, "num_tokens": 1092248977.0, "step": 5724 }, { "epoch": 1.9540877282812765, "grad_norm": 0.2500685627604751, "learning_rate": 1.9366464339908955e-05, "loss": 0.3555, "num_tokens": 1092485551.0, "step": 5725 }, { "epoch": 1.9544290834613416, "grad_norm": 0.20074997039753534, "learning_rate": 1.93601416287304e-05, "loss": 0.2989, "num_tokens": 1092658537.0, "step": 5726 }, { "epoch": 1.9547704386414064, "grad_norm": 0.19779978314430557, "learning_rate": 1.9353818917551845e-05, "loss": 0.329, "num_tokens": 1092888349.0, "step": 5727 }, { "epoch": 1.9551117938214713, "grad_norm": 0.2195407952105543, "learning_rate": 1.9347496206373296e-05, "loss": 0.3118, "num_tokens": 1093106074.0, "step": 5728 }, { "epoch": 1.955453149001536, "grad_norm": 0.18485431761919505, "learning_rate": 1.934117349519474e-05, "loss": 0.2978, "num_tokens": 1093284791.0, "step": 5729 }, { "epoch": 1.955794504181601, "grad_norm": 0.22000585184331342, "learning_rate": 1.9334850784016186e-05, "loss": 0.3163, "num_tokens": 1093494144.0, "step": 5730 }, { "epoch": 1.9561358593616658, "grad_norm": 0.2073265905913307, "learning_rate": 1.9328528072837633e-05, "loss": 0.317, "num_tokens": 1093701063.0, "step": 5731 }, { "epoch": 1.9564772145417306, "grad_norm": 0.2290841855104944, "learning_rate": 1.932220536165908e-05, "loss": 0.3182, "num_tokens": 1093875416.0, "step": 5732 }, { "epoch": 1.9568185697217957, "grad_norm": 0.24526884369992097, "learning_rate": 1.9315882650480527e-05, "loss": 0.2926, "num_tokens": 1094024654.0, "step": 5733 }, { "epoch": 1.9571599249018603, "grad_norm": 0.2336192228013263, "learning_rate": 1.9309559939301974e-05, "loss": 0.3125, "num_tokens": 1094171219.0, "step": 5734 }, { "epoch": 1.9575012800819254, "grad_norm": 0.26939075095600656, "learning_rate": 1.9303237228123418e-05, "loss": 0.2827, "num_tokens": 1094347983.0, "step": 5735 }, { "epoch": 1.95784263526199, "grad_norm": 0.21152862167734673, "learning_rate": 1.9296914516944868e-05, "loss": 0.3054, "num_tokens": 1094515109.0, "step": 5736 }, { "epoch": 1.958183990442055, "grad_norm": 0.20282795457956002, "learning_rate": 1.9290591805766315e-05, "loss": 0.2989, "num_tokens": 1094715415.0, "step": 5737 }, { "epoch": 1.9585253456221197, "grad_norm": 0.21532433304863163, "learning_rate": 1.928426909458776e-05, "loss": 0.3243, "num_tokens": 1094928665.0, "step": 5738 }, { "epoch": 1.9588667008021847, "grad_norm": 0.23076800215740598, "learning_rate": 1.9277946383409206e-05, "loss": 0.2694, "num_tokens": 1095095013.0, "step": 5739 }, { "epoch": 1.9592080559822496, "grad_norm": 0.20194449129602807, "learning_rate": 1.9271623672230653e-05, "loss": 0.2893, "num_tokens": 1095282852.0, "step": 5740 }, { "epoch": 1.9595494111623144, "grad_norm": 0.20711351470027065, "learning_rate": 1.92653009610521e-05, "loss": 0.301, "num_tokens": 1095501735.0, "step": 5741 }, { "epoch": 1.9598907663423792, "grad_norm": 0.24654896407860238, "learning_rate": 1.9258978249873547e-05, "loss": 0.291, "num_tokens": 1095650844.0, "step": 5742 }, { "epoch": 1.960232121522444, "grad_norm": 0.21714162239970974, "learning_rate": 1.9252655538694994e-05, "loss": 0.2821, "num_tokens": 1095845886.0, "step": 5743 }, { "epoch": 1.960573476702509, "grad_norm": 0.26782630693857445, "learning_rate": 1.9246332827516437e-05, "loss": 0.2916, "num_tokens": 1096055895.0, "step": 5744 }, { "epoch": 1.9609148318825738, "grad_norm": 0.19869239185507462, "learning_rate": 1.9240010116337888e-05, "loss": 0.3163, "num_tokens": 1096301436.0, "step": 5745 }, { "epoch": 1.9612561870626388, "grad_norm": 0.2350993316723834, "learning_rate": 1.9233687405159335e-05, "loss": 0.3094, "num_tokens": 1096485305.0, "step": 5746 }, { "epoch": 1.9615975422427034, "grad_norm": 0.21464657402560072, "learning_rate": 1.9227364693980778e-05, "loss": 0.319, "num_tokens": 1096658630.0, "step": 5747 }, { "epoch": 1.9619388974227685, "grad_norm": 0.21884345462444285, "learning_rate": 1.9221041982802225e-05, "loss": 0.2859, "num_tokens": 1096902246.0, "step": 5748 }, { "epoch": 1.9622802526028331, "grad_norm": 0.2001577774373423, "learning_rate": 1.9214719271623676e-05, "loss": 0.3236, "num_tokens": 1097124009.0, "step": 5749 }, { "epoch": 1.9626216077828982, "grad_norm": 0.2094595941348024, "learning_rate": 1.920839656044512e-05, "loss": 0.2637, "num_tokens": 1097282735.0, "step": 5750 }, { "epoch": 1.9629629629629628, "grad_norm": 0.20454216998437577, "learning_rate": 1.9202073849266566e-05, "loss": 0.3455, "num_tokens": 1097532125.0, "step": 5751 }, { "epoch": 1.9633043181430279, "grad_norm": 0.2151122573229243, "learning_rate": 1.9195751138088013e-05, "loss": 0.292, "num_tokens": 1097689840.0, "step": 5752 }, { "epoch": 1.9636456733230927, "grad_norm": 0.20395385342840178, "learning_rate": 1.918942842690946e-05, "loss": 0.271, "num_tokens": 1097880295.0, "step": 5753 }, { "epoch": 1.9639870285031575, "grad_norm": 0.20612193433862794, "learning_rate": 1.9183105715730907e-05, "loss": 0.2834, "num_tokens": 1098080793.0, "step": 5754 }, { "epoch": 1.9643283836832224, "grad_norm": 0.22643184439117545, "learning_rate": 1.9176783004552354e-05, "loss": 0.305, "num_tokens": 1098300443.0, "step": 5755 }, { "epoch": 1.9646697388632872, "grad_norm": 0.19665214799092592, "learning_rate": 1.9170460293373798e-05, "loss": 0.3137, "num_tokens": 1098526868.0, "step": 5756 }, { "epoch": 1.965011094043352, "grad_norm": 0.22173977850806456, "learning_rate": 1.9164137582195245e-05, "loss": 0.3028, "num_tokens": 1098699922.0, "step": 5757 }, { "epoch": 1.965352449223417, "grad_norm": 0.23612308069046578, "learning_rate": 1.9157814871016695e-05, "loss": 0.284, "num_tokens": 1098890311.0, "step": 5758 }, { "epoch": 1.965693804403482, "grad_norm": 0.24222205263507562, "learning_rate": 1.915149215983814e-05, "loss": 0.2949, "num_tokens": 1099042657.0, "step": 5759 }, { "epoch": 1.9660351595835466, "grad_norm": 0.2001767948833209, "learning_rate": 1.9145169448659586e-05, "loss": 0.2805, "num_tokens": 1099251928.0, "step": 5760 }, { "epoch": 1.9663765147636116, "grad_norm": 0.2595700075427272, "learning_rate": 1.9138846737481033e-05, "loss": 0.3148, "num_tokens": 1099411312.0, "step": 5761 }, { "epoch": 1.9667178699436763, "grad_norm": 0.2041915890856957, "learning_rate": 1.913252402630248e-05, "loss": 0.3105, "num_tokens": 1099602607.0, "step": 5762 }, { "epoch": 1.9670592251237413, "grad_norm": 0.23826265901870428, "learning_rate": 1.9126201315123927e-05, "loss": 0.2845, "num_tokens": 1099782555.0, "step": 5763 }, { "epoch": 1.9674005803038062, "grad_norm": 0.20211744451724342, "learning_rate": 1.9119878603945374e-05, "loss": 0.2912, "num_tokens": 1099966997.0, "step": 5764 }, { "epoch": 1.967741935483871, "grad_norm": 0.2227503798763363, "learning_rate": 1.9113555892766817e-05, "loss": 0.306, "num_tokens": 1100168905.0, "step": 5765 }, { "epoch": 1.9680832906639358, "grad_norm": 0.19234980178899586, "learning_rate": 1.9107233181588268e-05, "loss": 0.3065, "num_tokens": 1100370567.0, "step": 5766 }, { "epoch": 1.9684246458440007, "grad_norm": 0.20860834518202492, "learning_rate": 1.9100910470409715e-05, "loss": 0.3041, "num_tokens": 1100519114.0, "step": 5767 }, { "epoch": 1.9687660010240655, "grad_norm": 0.2300581513346563, "learning_rate": 1.9094587759231158e-05, "loss": 0.2913, "num_tokens": 1100701237.0, "step": 5768 }, { "epoch": 1.9691073562041304, "grad_norm": 0.20317597040818114, "learning_rate": 1.9088265048052605e-05, "loss": 0.2803, "num_tokens": 1100883619.0, "step": 5769 }, { "epoch": 1.9694487113841954, "grad_norm": 0.20500891062453336, "learning_rate": 1.9081942336874052e-05, "loss": 0.3044, "num_tokens": 1101048511.0, "step": 5770 }, { "epoch": 1.96979006656426, "grad_norm": 0.21773711285537642, "learning_rate": 1.90756196256955e-05, "loss": 0.3171, "num_tokens": 1101270207.0, "step": 5771 }, { "epoch": 1.970131421744325, "grad_norm": 0.20581770988875261, "learning_rate": 1.9069296914516946e-05, "loss": 0.3115, "num_tokens": 1101468677.0, "step": 5772 }, { "epoch": 1.9704727769243897, "grad_norm": 0.26496910956885944, "learning_rate": 1.9062974203338393e-05, "loss": 0.3149, "num_tokens": 1101675061.0, "step": 5773 }, { "epoch": 1.9708141321044548, "grad_norm": 0.1952149121515671, "learning_rate": 1.9056651492159837e-05, "loss": 0.2979, "num_tokens": 1101846769.0, "step": 5774 }, { "epoch": 1.9711554872845194, "grad_norm": 0.2203480327632915, "learning_rate": 1.9050328780981287e-05, "loss": 0.2831, "num_tokens": 1102046118.0, "step": 5775 }, { "epoch": 1.9714968424645845, "grad_norm": 0.20324080116737406, "learning_rate": 1.9044006069802734e-05, "loss": 0.2826, "num_tokens": 1102227700.0, "step": 5776 }, { "epoch": 1.9718381976446493, "grad_norm": 0.20337705457683497, "learning_rate": 1.9037683358624178e-05, "loss": 0.3127, "num_tokens": 1102463092.0, "step": 5777 }, { "epoch": 1.9721795528247141, "grad_norm": 0.19515279995184523, "learning_rate": 1.9031360647445625e-05, "loss": 0.3099, "num_tokens": 1102686915.0, "step": 5778 }, { "epoch": 1.972520908004779, "grad_norm": 0.21994934633116373, "learning_rate": 1.9025037936267075e-05, "loss": 0.3316, "num_tokens": 1102873394.0, "step": 5779 }, { "epoch": 1.9728622631848438, "grad_norm": 0.24077173460250978, "learning_rate": 1.901871522508852e-05, "loss": 0.3122, "num_tokens": 1103061444.0, "step": 5780 }, { "epoch": 1.9732036183649087, "grad_norm": 0.194185664665454, "learning_rate": 1.9012392513909966e-05, "loss": 0.2846, "num_tokens": 1103217785.0, "step": 5781 }, { "epoch": 1.9735449735449735, "grad_norm": 0.22104033984766436, "learning_rate": 1.9006069802731412e-05, "loss": 0.3076, "num_tokens": 1103438654.0, "step": 5782 }, { "epoch": 1.9738863287250386, "grad_norm": 0.22824806583742424, "learning_rate": 1.899974709155286e-05, "loss": 0.3033, "num_tokens": 1103629643.0, "step": 5783 }, { "epoch": 1.9742276839051032, "grad_norm": 0.2095618166075831, "learning_rate": 1.8993424380374306e-05, "loss": 0.2885, "num_tokens": 1103814061.0, "step": 5784 }, { "epoch": 1.9745690390851682, "grad_norm": 0.21139382923244304, "learning_rate": 1.8987101669195753e-05, "loss": 0.2762, "num_tokens": 1103983431.0, "step": 5785 }, { "epoch": 1.9749103942652328, "grad_norm": 0.18202907623442863, "learning_rate": 1.8980778958017197e-05, "loss": 0.3024, "num_tokens": 1104166407.0, "step": 5786 }, { "epoch": 1.975251749445298, "grad_norm": 0.2543013827661824, "learning_rate": 1.8974456246838644e-05, "loss": 0.3077, "num_tokens": 1104351420.0, "step": 5787 }, { "epoch": 1.9755931046253625, "grad_norm": 0.23370995470367914, "learning_rate": 1.8968133535660094e-05, "loss": 0.2861, "num_tokens": 1104549759.0, "step": 5788 }, { "epoch": 1.9759344598054276, "grad_norm": 0.20106501087898226, "learning_rate": 1.8961810824481538e-05, "loss": 0.3026, "num_tokens": 1104724103.0, "step": 5789 }, { "epoch": 1.9762758149854924, "grad_norm": 0.19996849216809895, "learning_rate": 1.8955488113302985e-05, "loss": 0.2949, "num_tokens": 1104932669.0, "step": 5790 }, { "epoch": 1.9766171701655573, "grad_norm": 0.20707338905440725, "learning_rate": 1.8949165402124432e-05, "loss": 0.3221, "num_tokens": 1105133553.0, "step": 5791 }, { "epoch": 1.976958525345622, "grad_norm": 0.20315953093573877, "learning_rate": 1.894284269094588e-05, "loss": 0.3156, "num_tokens": 1105354008.0, "step": 5792 }, { "epoch": 1.977299880525687, "grad_norm": 0.2273945267374633, "learning_rate": 1.8936519979767326e-05, "loss": 0.298, "num_tokens": 1105534391.0, "step": 5793 }, { "epoch": 1.9776412357057518, "grad_norm": 0.19495075707538423, "learning_rate": 1.8930197268588773e-05, "loss": 0.2944, "num_tokens": 1105683481.0, "step": 5794 }, { "epoch": 1.9779825908858166, "grad_norm": 0.2269893920508091, "learning_rate": 1.8923874557410216e-05, "loss": 0.2907, "num_tokens": 1105895622.0, "step": 5795 }, { "epoch": 1.9783239460658817, "grad_norm": 0.2064948384275788, "learning_rate": 1.8917551846231667e-05, "loss": 0.2948, "num_tokens": 1106096306.0, "step": 5796 }, { "epoch": 1.9786653012459463, "grad_norm": 0.22317238842987785, "learning_rate": 1.8911229135053114e-05, "loss": 0.2612, "num_tokens": 1106236678.0, "step": 5797 }, { "epoch": 1.9790066564260114, "grad_norm": 0.22304327424677395, "learning_rate": 1.8904906423874557e-05, "loss": 0.3017, "num_tokens": 1106436026.0, "step": 5798 }, { "epoch": 1.979348011606076, "grad_norm": 0.19513536583854477, "learning_rate": 1.8898583712696004e-05, "loss": 0.2971, "num_tokens": 1106625323.0, "step": 5799 }, { "epoch": 1.979689366786141, "grad_norm": 0.2097507756716577, "learning_rate": 1.889226100151745e-05, "loss": 0.3111, "num_tokens": 1106839045.0, "step": 5800 }, { "epoch": 1.9800307219662059, "grad_norm": 0.18953642402185938, "learning_rate": 1.88859382903389e-05, "loss": 0.3051, "num_tokens": 1107032768.0, "step": 5801 }, { "epoch": 1.9803720771462707, "grad_norm": 0.2305432406316404, "learning_rate": 1.8879615579160345e-05, "loss": 0.3299, "num_tokens": 1107221099.0, "step": 5802 }, { "epoch": 1.9807134323263356, "grad_norm": 0.20908129021964167, "learning_rate": 1.8873292867981792e-05, "loss": 0.3204, "num_tokens": 1107428781.0, "step": 5803 }, { "epoch": 1.9810547875064004, "grad_norm": 0.23090495170295913, "learning_rate": 1.8866970156803236e-05, "loss": 0.2691, "num_tokens": 1107626571.0, "step": 5804 }, { "epoch": 1.9813961426864652, "grad_norm": 0.203962565458321, "learning_rate": 1.8860647445624686e-05, "loss": 0.3157, "num_tokens": 1107774709.0, "step": 5805 }, { "epoch": 1.98173749786653, "grad_norm": 0.2409192029570199, "learning_rate": 1.8854324734446133e-05, "loss": 0.3169, "num_tokens": 1107995373.0, "step": 5806 }, { "epoch": 1.9820788530465951, "grad_norm": 0.22567377680517187, "learning_rate": 1.8848002023267577e-05, "loss": 0.2995, "num_tokens": 1108188456.0, "step": 5807 }, { "epoch": 1.9824202082266598, "grad_norm": 0.2044482870447948, "learning_rate": 1.8841679312089024e-05, "loss": 0.323, "num_tokens": 1108448365.0, "step": 5808 }, { "epoch": 1.9827615634067248, "grad_norm": 0.20148391860825063, "learning_rate": 1.8835356600910474e-05, "loss": 0.3088, "num_tokens": 1108595438.0, "step": 5809 }, { "epoch": 1.9831029185867894, "grad_norm": 0.21284786797659194, "learning_rate": 1.8829033889731918e-05, "loss": 0.2688, "num_tokens": 1108777270.0, "step": 5810 }, { "epoch": 1.9834442737668545, "grad_norm": 0.21519424867453385, "learning_rate": 1.8822711178553365e-05, "loss": 0.3115, "num_tokens": 1108988221.0, "step": 5811 }, { "epoch": 1.9837856289469191, "grad_norm": 0.21509175731025926, "learning_rate": 1.8816388467374812e-05, "loss": 0.2973, "num_tokens": 1109156476.0, "step": 5812 }, { "epoch": 1.9841269841269842, "grad_norm": 0.19827918428879446, "learning_rate": 1.881006575619626e-05, "loss": 0.3004, "num_tokens": 1109358571.0, "step": 5813 }, { "epoch": 1.984468339307049, "grad_norm": 0.2862945928113696, "learning_rate": 1.8803743045017706e-05, "loss": 0.2961, "num_tokens": 1109540645.0, "step": 5814 }, { "epoch": 1.9848096944871139, "grad_norm": 0.2050051785602042, "learning_rate": 1.8797420333839153e-05, "loss": 0.3225, "num_tokens": 1109757731.0, "step": 5815 }, { "epoch": 1.9851510496671787, "grad_norm": 0.23931882668252327, "learning_rate": 1.8791097622660596e-05, "loss": 0.3052, "num_tokens": 1109975140.0, "step": 5816 }, { "epoch": 1.9854924048472435, "grad_norm": 0.20120401773132338, "learning_rate": 1.8784774911482043e-05, "loss": 0.3169, "num_tokens": 1110167512.0, "step": 5817 }, { "epoch": 1.9858337600273084, "grad_norm": 0.18532675018092856, "learning_rate": 1.8778452200303494e-05, "loss": 0.2949, "num_tokens": 1110376753.0, "step": 5818 }, { "epoch": 1.9861751152073732, "grad_norm": 0.2323427815069395, "learning_rate": 1.8772129489124937e-05, "loss": 0.2964, "num_tokens": 1110540891.0, "step": 5819 }, { "epoch": 1.9865164703874383, "grad_norm": 0.21679985583821487, "learning_rate": 1.8765806777946384e-05, "loss": 0.3071, "num_tokens": 1110712260.0, "step": 5820 }, { "epoch": 1.986857825567503, "grad_norm": 0.2836975746741429, "learning_rate": 1.875948406676783e-05, "loss": 0.3219, "num_tokens": 1110866150.0, "step": 5821 }, { "epoch": 1.987199180747568, "grad_norm": 0.22090131607433136, "learning_rate": 1.8753161355589278e-05, "loss": 0.3116, "num_tokens": 1111063576.0, "step": 5822 }, { "epoch": 1.9875405359276326, "grad_norm": 0.22125576324977436, "learning_rate": 1.8746838644410725e-05, "loss": 0.2986, "num_tokens": 1111256778.0, "step": 5823 }, { "epoch": 1.9878818911076976, "grad_norm": 0.20173690571184652, "learning_rate": 1.874051593323217e-05, "loss": 0.3059, "num_tokens": 1111432423.0, "step": 5824 }, { "epoch": 1.9882232462877623, "grad_norm": 0.22823169934294912, "learning_rate": 1.8734193222053616e-05, "loss": 0.2809, "num_tokens": 1111611021.0, "step": 5825 }, { "epoch": 1.9885646014678273, "grad_norm": 0.21429437961628223, "learning_rate": 1.8727870510875066e-05, "loss": 0.3393, "num_tokens": 1111810618.0, "step": 5826 }, { "epoch": 1.9889059566478922, "grad_norm": 0.23676058706030598, "learning_rate": 1.872154779969651e-05, "loss": 0.2978, "num_tokens": 1112008382.0, "step": 5827 }, { "epoch": 1.989247311827957, "grad_norm": 0.2057144482083419, "learning_rate": 1.8715225088517957e-05, "loss": 0.2943, "num_tokens": 1112244281.0, "step": 5828 }, { "epoch": 1.9895886670080218, "grad_norm": 0.19296530426755856, "learning_rate": 1.8708902377339404e-05, "loss": 0.3019, "num_tokens": 1112408858.0, "step": 5829 }, { "epoch": 1.9899300221880867, "grad_norm": 0.2349256623858103, "learning_rate": 1.870257966616085e-05, "loss": 0.3095, "num_tokens": 1112615596.0, "step": 5830 }, { "epoch": 1.9902713773681515, "grad_norm": 0.19175775093297728, "learning_rate": 1.8696256954982298e-05, "loss": 0.2736, "num_tokens": 1112809140.0, "step": 5831 }, { "epoch": 1.9906127325482164, "grad_norm": 0.19936950920978053, "learning_rate": 1.8689934243803745e-05, "loss": 0.3086, "num_tokens": 1112996295.0, "step": 5832 }, { "epoch": 1.9909540877282814, "grad_norm": 0.2401070673245837, "learning_rate": 1.8683611532625188e-05, "loss": 0.3128, "num_tokens": 1113204576.0, "step": 5833 }, { "epoch": 1.991295442908346, "grad_norm": 0.2056107968522638, "learning_rate": 1.8677288821446635e-05, "loss": 0.3066, "num_tokens": 1113381779.0, "step": 5834 }, { "epoch": 1.991636798088411, "grad_norm": 0.2125975702844764, "learning_rate": 1.8670966110268086e-05, "loss": 0.2936, "num_tokens": 1113605258.0, "step": 5835 }, { "epoch": 1.9919781532684757, "grad_norm": 0.1940922037255041, "learning_rate": 1.866464339908953e-05, "loss": 0.319, "num_tokens": 1113811119.0, "step": 5836 }, { "epoch": 1.9923195084485408, "grad_norm": 0.23289754754531444, "learning_rate": 1.8658320687910976e-05, "loss": 0.2985, "num_tokens": 1113991072.0, "step": 5837 }, { "epoch": 1.9926608636286056, "grad_norm": 0.20266754890517222, "learning_rate": 1.8651997976732423e-05, "loss": 0.2824, "num_tokens": 1114150673.0, "step": 5838 }, { "epoch": 1.9930022188086705, "grad_norm": 0.22975142741278273, "learning_rate": 1.864567526555387e-05, "loss": 0.2533, "num_tokens": 1114307661.0, "step": 5839 }, { "epoch": 1.9933435739887353, "grad_norm": 0.21667870048173932, "learning_rate": 1.8639352554375317e-05, "loss": 0.2861, "num_tokens": 1114501035.0, "step": 5840 }, { "epoch": 1.9936849291688001, "grad_norm": 0.22269309899947423, "learning_rate": 1.8633029843196764e-05, "loss": 0.2952, "num_tokens": 1114723789.0, "step": 5841 }, { "epoch": 1.994026284348865, "grad_norm": 0.2036653332014523, "learning_rate": 1.8626707132018208e-05, "loss": 0.3336, "num_tokens": 1114901410.0, "step": 5842 }, { "epoch": 1.9943676395289298, "grad_norm": 0.21890146659732987, "learning_rate": 1.8620384420839658e-05, "loss": 0.2904, "num_tokens": 1115124044.0, "step": 5843 }, { "epoch": 1.9947089947089947, "grad_norm": 0.22965422320748569, "learning_rate": 1.8614061709661105e-05, "loss": 0.2796, "num_tokens": 1115298071.0, "step": 5844 }, { "epoch": 1.9950503498890595, "grad_norm": 0.209753945217741, "learning_rate": 1.860773899848255e-05, "loss": 0.2992, "num_tokens": 1115477803.0, "step": 5845 }, { "epoch": 1.9953917050691246, "grad_norm": 0.21729518957510813, "learning_rate": 1.8601416287303996e-05, "loss": 0.2844, "num_tokens": 1115696541.0, "step": 5846 }, { "epoch": 1.9957330602491892, "grad_norm": 0.19145544118464292, "learning_rate": 1.8595093576125443e-05, "loss": 0.3095, "num_tokens": 1115903770.0, "step": 5847 }, { "epoch": 1.9960744154292542, "grad_norm": 0.2180295613507109, "learning_rate": 1.858877086494689e-05, "loss": 0.3101, "num_tokens": 1116100944.0, "step": 5848 }, { "epoch": 1.9964157706093189, "grad_norm": 0.2138241704420768, "learning_rate": 1.8582448153768337e-05, "loss": 0.2846, "num_tokens": 1116292995.0, "step": 5849 }, { "epoch": 1.996757125789384, "grad_norm": 0.2218940419955326, "learning_rate": 1.8576125442589784e-05, "loss": 0.2913, "num_tokens": 1116467434.0, "step": 5850 }, { "epoch": 1.9970984809694488, "grad_norm": 0.21465824974102005, "learning_rate": 1.8569802731411227e-05, "loss": 0.2894, "num_tokens": 1116614514.0, "step": 5851 }, { "epoch": 1.9974398361495136, "grad_norm": 0.2205497643087274, "learning_rate": 1.8563480020232678e-05, "loss": 0.2854, "num_tokens": 1116817813.0, "step": 5852 }, { "epoch": 1.9977811913295784, "grad_norm": 0.2143630813452534, "learning_rate": 1.8557157309054125e-05, "loss": 0.311, "num_tokens": 1116997982.0, "step": 5853 }, { "epoch": 1.9981225465096433, "grad_norm": 0.3197172739448843, "learning_rate": 1.8550834597875568e-05, "loss": 0.3195, "num_tokens": 1117203741.0, "step": 5854 }, { "epoch": 1.998463901689708, "grad_norm": 0.2217315703559094, "learning_rate": 1.8544511886697015e-05, "loss": 0.2815, "num_tokens": 1117352001.0, "step": 5855 }, { "epoch": 1.998805256869773, "grad_norm": 0.21554485953382777, "learning_rate": 1.8538189175518466e-05, "loss": 0.3046, "num_tokens": 1117550068.0, "step": 5856 }, { "epoch": 1.999146612049838, "grad_norm": 0.22169914917541236, "learning_rate": 1.853186646433991e-05, "loss": 0.3002, "num_tokens": 1117731942.0, "step": 5857 }, { "epoch": 1.9994879672299026, "grad_norm": 0.21147738779738312, "learning_rate": 1.8525543753161356e-05, "loss": 0.2724, "num_tokens": 1117899255.0, "step": 5858 }, { "epoch": 1.9998293224099677, "grad_norm": 0.23618292730672447, "learning_rate": 1.8519221041982803e-05, "loss": 0.3138, "num_tokens": 1118079611.0, "step": 5859 }, { "epoch": 2.0, "grad_norm": 0.4237456794083294, "learning_rate": 1.851289833080425e-05, "loss": 0.2628, "num_tokens": 1118139570.0, "step": 5860 }, { "epoch": 2.000341355180065, "grad_norm": 0.2949082753973559, "learning_rate": 1.8506575619625697e-05, "loss": 0.2329, "num_tokens": 1118349024.0, "step": 5861 }, { "epoch": 2.0006827103601297, "grad_norm": 0.26062290298624263, "learning_rate": 1.8500252908447144e-05, "loss": 0.2339, "num_tokens": 1118528254.0, "step": 5862 }, { "epoch": 2.0010240655401947, "grad_norm": 0.2578792394907295, "learning_rate": 1.8493930197268588e-05, "loss": 0.2423, "num_tokens": 1118721441.0, "step": 5863 }, { "epoch": 2.0013654207202594, "grad_norm": 0.3183266510284993, "learning_rate": 1.8487607486090035e-05, "loss": 0.2352, "num_tokens": 1118923465.0, "step": 5864 }, { "epoch": 2.0017067759003244, "grad_norm": 0.28804996613602984, "learning_rate": 1.8481284774911485e-05, "loss": 0.2393, "num_tokens": 1119140204.0, "step": 5865 }, { "epoch": 2.002048131080389, "grad_norm": 0.23333715049290804, "learning_rate": 1.847496206373293e-05, "loss": 0.2365, "num_tokens": 1119318053.0, "step": 5866 }, { "epoch": 2.002389486260454, "grad_norm": 0.24163776206832133, "learning_rate": 1.8468639352554376e-05, "loss": 0.2235, "num_tokens": 1119505714.0, "step": 5867 }, { "epoch": 2.0027308414405187, "grad_norm": 0.25480584328757905, "learning_rate": 1.8462316641375823e-05, "loss": 0.2222, "num_tokens": 1119698943.0, "step": 5868 }, { "epoch": 2.003072196620584, "grad_norm": 0.2503350055604555, "learning_rate": 1.845599393019727e-05, "loss": 0.2594, "num_tokens": 1119909109.0, "step": 5869 }, { "epoch": 2.0034135518006484, "grad_norm": 0.24564255940481566, "learning_rate": 1.8449671219018716e-05, "loss": 0.2214, "num_tokens": 1120082675.0, "step": 5870 }, { "epoch": 2.0037549069807135, "grad_norm": 0.2529986379108557, "learning_rate": 1.8443348507840163e-05, "loss": 0.2241, "num_tokens": 1120263979.0, "step": 5871 }, { "epoch": 2.004096262160778, "grad_norm": 0.2720183126301052, "learning_rate": 1.8437025796661607e-05, "loss": 0.2182, "num_tokens": 1120436905.0, "step": 5872 }, { "epoch": 2.004437617340843, "grad_norm": 0.2500112446136419, "learning_rate": 1.8430703085483057e-05, "loss": 0.2425, "num_tokens": 1120644721.0, "step": 5873 }, { "epoch": 2.004778972520908, "grad_norm": 0.24245321420418747, "learning_rate": 1.8424380374304504e-05, "loss": 0.2281, "num_tokens": 1120839946.0, "step": 5874 }, { "epoch": 2.005120327700973, "grad_norm": 0.25901598176594787, "learning_rate": 1.8418057663125948e-05, "loss": 0.2082, "num_tokens": 1121001449.0, "step": 5875 }, { "epoch": 2.005461682881038, "grad_norm": 0.2536907500695624, "learning_rate": 1.8411734951947395e-05, "loss": 0.2266, "num_tokens": 1121206476.0, "step": 5876 }, { "epoch": 2.0058030380611025, "grad_norm": 0.22170689954640446, "learning_rate": 1.8405412240768842e-05, "loss": 0.2213, "num_tokens": 1121397741.0, "step": 5877 }, { "epoch": 2.0061443932411676, "grad_norm": 0.2562278837346855, "learning_rate": 1.839908952959029e-05, "loss": 0.2247, "num_tokens": 1121567739.0, "step": 5878 }, { "epoch": 2.006485748421232, "grad_norm": 0.24363320361511437, "learning_rate": 1.8392766818411736e-05, "loss": 0.222, "num_tokens": 1121752195.0, "step": 5879 }, { "epoch": 2.0068271036012972, "grad_norm": 0.27411130170048414, "learning_rate": 1.8386444107233183e-05, "loss": 0.2305, "num_tokens": 1121970418.0, "step": 5880 }, { "epoch": 2.007168458781362, "grad_norm": 0.23574950816613635, "learning_rate": 1.8380121396054627e-05, "loss": 0.2205, "num_tokens": 1122146744.0, "step": 5881 }, { "epoch": 2.007509813961427, "grad_norm": 0.23940421056876654, "learning_rate": 1.8373798684876077e-05, "loss": 0.2246, "num_tokens": 1122340942.0, "step": 5882 }, { "epoch": 2.0078511691414915, "grad_norm": 0.24557996059811457, "learning_rate": 1.8367475973697524e-05, "loss": 0.2244, "num_tokens": 1122539319.0, "step": 5883 }, { "epoch": 2.0081925243215566, "grad_norm": 0.23372564349799826, "learning_rate": 1.8361153262518967e-05, "loss": 0.2098, "num_tokens": 1122718198.0, "step": 5884 }, { "epoch": 2.0085338795016217, "grad_norm": 0.22801437286663526, "learning_rate": 1.8354830551340414e-05, "loss": 0.2189, "num_tokens": 1122905254.0, "step": 5885 }, { "epoch": 2.0088752346816863, "grad_norm": 0.22793033044180747, "learning_rate": 1.8348507840161865e-05, "loss": 0.2355, "num_tokens": 1123113391.0, "step": 5886 }, { "epoch": 2.0092165898617513, "grad_norm": 0.22121203032337985, "learning_rate": 1.834218512898331e-05, "loss": 0.2236, "num_tokens": 1123289209.0, "step": 5887 }, { "epoch": 2.009557945041816, "grad_norm": 0.2187195661899972, "learning_rate": 1.8335862417804755e-05, "loss": 0.2277, "num_tokens": 1123506208.0, "step": 5888 }, { "epoch": 2.009899300221881, "grad_norm": 0.531555567349158, "learning_rate": 1.8329539706626202e-05, "loss": 0.2419, "num_tokens": 1123751228.0, "step": 5889 }, { "epoch": 2.0102406554019456, "grad_norm": 0.22169712201540653, "learning_rate": 1.832321699544765e-05, "loss": 0.24, "num_tokens": 1123960544.0, "step": 5890 }, { "epoch": 2.0105820105820107, "grad_norm": 0.24493116193573525, "learning_rate": 1.8316894284269096e-05, "loss": 0.2372, "num_tokens": 1124143376.0, "step": 5891 }, { "epoch": 2.0109233657620753, "grad_norm": 0.22966332111042959, "learning_rate": 1.8310571573090543e-05, "loss": 0.2494, "num_tokens": 1124342654.0, "step": 5892 }, { "epoch": 2.0112647209421404, "grad_norm": 0.2346968697411093, "learning_rate": 1.8304248861911987e-05, "loss": 0.2221, "num_tokens": 1124528843.0, "step": 5893 }, { "epoch": 2.011606076122205, "grad_norm": 0.2304295093300021, "learning_rate": 1.8297926150733434e-05, "loss": 0.2071, "num_tokens": 1124698353.0, "step": 5894 }, { "epoch": 2.01194743130227, "grad_norm": 0.21429533072583767, "learning_rate": 1.8291603439554884e-05, "loss": 0.2268, "num_tokens": 1124912896.0, "step": 5895 }, { "epoch": 2.0122887864823347, "grad_norm": 0.2400197703461521, "learning_rate": 1.8285280728376328e-05, "loss": 0.2115, "num_tokens": 1125091903.0, "step": 5896 }, { "epoch": 2.0126301416623997, "grad_norm": 0.2099608403057006, "learning_rate": 1.8278958017197775e-05, "loss": 0.251, "num_tokens": 1125328323.0, "step": 5897 }, { "epoch": 2.012971496842465, "grad_norm": 0.23480232862075484, "learning_rate": 1.8272635306019222e-05, "loss": 0.2344, "num_tokens": 1125531329.0, "step": 5898 }, { "epoch": 2.0133128520225294, "grad_norm": 0.2524418721622958, "learning_rate": 1.826631259484067e-05, "loss": 0.2448, "num_tokens": 1125728533.0, "step": 5899 }, { "epoch": 2.0136542072025945, "grad_norm": 0.2746202589094135, "learning_rate": 1.8259989883662116e-05, "loss": 0.2361, "num_tokens": 1125902458.0, "step": 5900 }, { "epoch": 2.013995562382659, "grad_norm": 0.2751604172605141, "learning_rate": 1.8253667172483563e-05, "loss": 0.2325, "num_tokens": 1126055159.0, "step": 5901 }, { "epoch": 2.014336917562724, "grad_norm": 0.23612184288371366, "learning_rate": 1.8247344461305006e-05, "loss": 0.2271, "num_tokens": 1126250932.0, "step": 5902 }, { "epoch": 2.0146782727427888, "grad_norm": 0.21911025195894956, "learning_rate": 1.8241021750126457e-05, "loss": 0.2358, "num_tokens": 1126453566.0, "step": 5903 }, { "epoch": 2.015019627922854, "grad_norm": 0.2535708732156875, "learning_rate": 1.8234699038947904e-05, "loss": 0.2218, "num_tokens": 1126655230.0, "step": 5904 }, { "epoch": 2.0153609831029184, "grad_norm": 0.2278393097655069, "learning_rate": 1.8228376327769347e-05, "loss": 0.2329, "num_tokens": 1126877547.0, "step": 5905 }, { "epoch": 2.0157023382829835, "grad_norm": 0.2423235896248264, "learning_rate": 1.8222053616590794e-05, "loss": 0.2106, "num_tokens": 1127074704.0, "step": 5906 }, { "epoch": 2.016043693463048, "grad_norm": 0.2689637800137262, "learning_rate": 1.821573090541224e-05, "loss": 0.2506, "num_tokens": 1127300879.0, "step": 5907 }, { "epoch": 2.016385048643113, "grad_norm": 0.21845627827427777, "learning_rate": 1.8209408194233688e-05, "loss": 0.2083, "num_tokens": 1127486468.0, "step": 5908 }, { "epoch": 2.016726403823178, "grad_norm": 0.22560143317777182, "learning_rate": 1.8203085483055135e-05, "loss": 0.2227, "num_tokens": 1127676582.0, "step": 5909 }, { "epoch": 2.017067759003243, "grad_norm": 0.23911647538946704, "learning_rate": 1.8196762771876582e-05, "loss": 0.2136, "num_tokens": 1127851222.0, "step": 5910 }, { "epoch": 2.017409114183308, "grad_norm": 0.2769312851214127, "learning_rate": 1.8190440060698026e-05, "loss": 0.2148, "num_tokens": 1128000828.0, "step": 5911 }, { "epoch": 2.0177504693633725, "grad_norm": 0.22878817027835197, "learning_rate": 1.8184117349519476e-05, "loss": 0.2314, "num_tokens": 1128194857.0, "step": 5912 }, { "epoch": 2.0180918245434376, "grad_norm": 0.28365524236186407, "learning_rate": 1.8177794638340923e-05, "loss": 0.2208, "num_tokens": 1128326574.0, "step": 5913 }, { "epoch": 2.0184331797235022, "grad_norm": 0.22880402262684296, "learning_rate": 1.8171471927162367e-05, "loss": 0.2074, "num_tokens": 1128522348.0, "step": 5914 }, { "epoch": 2.0187745349035673, "grad_norm": 0.22693468227581562, "learning_rate": 1.8165149215983814e-05, "loss": 0.238, "num_tokens": 1128719487.0, "step": 5915 }, { "epoch": 2.019115890083632, "grad_norm": 0.2634258598292285, "learning_rate": 1.8158826504805264e-05, "loss": 0.2235, "num_tokens": 1128873146.0, "step": 5916 }, { "epoch": 2.019457245263697, "grad_norm": 0.24549196510244922, "learning_rate": 1.8152503793626708e-05, "loss": 0.2179, "num_tokens": 1129044767.0, "step": 5917 }, { "epoch": 2.0197986004437616, "grad_norm": 0.22924830104967275, "learning_rate": 1.8146181082448155e-05, "loss": 0.2296, "num_tokens": 1129230548.0, "step": 5918 }, { "epoch": 2.0201399556238266, "grad_norm": 0.26248223778102026, "learning_rate": 1.8139858371269602e-05, "loss": 0.2163, "num_tokens": 1129378864.0, "step": 5919 }, { "epoch": 2.0204813108038913, "grad_norm": 0.2281188925042923, "learning_rate": 1.813353566009105e-05, "loss": 0.2283, "num_tokens": 1129575311.0, "step": 5920 }, { "epoch": 2.0208226659839563, "grad_norm": 0.24889876019278703, "learning_rate": 1.8127212948912496e-05, "loss": 0.2446, "num_tokens": 1129777797.0, "step": 5921 }, { "epoch": 2.0211640211640214, "grad_norm": 0.21737898745483547, "learning_rate": 1.8120890237733943e-05, "loss": 0.239, "num_tokens": 1129996488.0, "step": 5922 }, { "epoch": 2.021505376344086, "grad_norm": 0.21463185911448904, "learning_rate": 1.8114567526555386e-05, "loss": 0.2173, "num_tokens": 1130193865.0, "step": 5923 }, { "epoch": 2.021846731524151, "grad_norm": 0.22694769666828862, "learning_rate": 1.8108244815376833e-05, "loss": 0.2319, "num_tokens": 1130407720.0, "step": 5924 }, { "epoch": 2.0221880867042157, "grad_norm": 0.24444270561945408, "learning_rate": 1.8101922104198284e-05, "loss": 0.2221, "num_tokens": 1130617682.0, "step": 5925 }, { "epoch": 2.0225294418842807, "grad_norm": 0.24103089091368915, "learning_rate": 1.8095599393019727e-05, "loss": 0.226, "num_tokens": 1130788123.0, "step": 5926 }, { "epoch": 2.0228707970643454, "grad_norm": 0.2460186914948877, "learning_rate": 1.8089276681841174e-05, "loss": 0.2334, "num_tokens": 1130983739.0, "step": 5927 }, { "epoch": 2.0232121522444104, "grad_norm": 0.2510334913495582, "learning_rate": 1.808295397066262e-05, "loss": 0.2566, "num_tokens": 1131178259.0, "step": 5928 }, { "epoch": 2.023553507424475, "grad_norm": 0.2583186584659626, "learning_rate": 1.8076631259484068e-05, "loss": 0.2258, "num_tokens": 1131335807.0, "step": 5929 }, { "epoch": 2.02389486260454, "grad_norm": 0.23356461310728785, "learning_rate": 1.8070308548305515e-05, "loss": 0.2163, "num_tokens": 1131509259.0, "step": 5930 }, { "epoch": 2.0242362177846047, "grad_norm": 0.24311951263053133, "learning_rate": 1.8063985837126962e-05, "loss": 0.2082, "num_tokens": 1131683067.0, "step": 5931 }, { "epoch": 2.02457757296467, "grad_norm": 0.2537163093963958, "learning_rate": 1.8057663125948406e-05, "loss": 0.2214, "num_tokens": 1131852970.0, "step": 5932 }, { "epoch": 2.0249189281447344, "grad_norm": 0.20969474796513157, "learning_rate": 1.8051340414769856e-05, "loss": 0.2147, "num_tokens": 1132071327.0, "step": 5933 }, { "epoch": 2.0252602833247995, "grad_norm": 0.27921468841155106, "learning_rate": 1.8045017703591303e-05, "loss": 0.2412, "num_tokens": 1132251827.0, "step": 5934 }, { "epoch": 2.0256016385048645, "grad_norm": 0.2324444297123428, "learning_rate": 1.8038694992412747e-05, "loss": 0.2159, "num_tokens": 1132428121.0, "step": 5935 }, { "epoch": 2.025942993684929, "grad_norm": 0.2118136983187968, "learning_rate": 1.8032372281234194e-05, "loss": 0.2312, "num_tokens": 1132666821.0, "step": 5936 }, { "epoch": 2.026284348864994, "grad_norm": 0.23519407306497142, "learning_rate": 1.802604957005564e-05, "loss": 0.2148, "num_tokens": 1132847779.0, "step": 5937 }, { "epoch": 2.026625704045059, "grad_norm": 0.2796918671774095, "learning_rate": 1.8019726858877088e-05, "loss": 0.2376, "num_tokens": 1133029597.0, "step": 5938 }, { "epoch": 2.026967059225124, "grad_norm": 0.2321796471491423, "learning_rate": 1.8013404147698535e-05, "loss": 0.2305, "num_tokens": 1133224806.0, "step": 5939 }, { "epoch": 2.0273084144051885, "grad_norm": 0.2310314818518368, "learning_rate": 1.800708143651998e-05, "loss": 0.2423, "num_tokens": 1133450087.0, "step": 5940 }, { "epoch": 2.0276497695852536, "grad_norm": 0.2476282125523714, "learning_rate": 1.8000758725341425e-05, "loss": 0.2442, "num_tokens": 1133654390.0, "step": 5941 }, { "epoch": 2.027991124765318, "grad_norm": 0.2292474041132029, "learning_rate": 1.7994436014162876e-05, "loss": 0.2525, "num_tokens": 1133889242.0, "step": 5942 }, { "epoch": 2.0283324799453832, "grad_norm": 0.24886600638895615, "learning_rate": 1.7988113302984323e-05, "loss": 0.2268, "num_tokens": 1134057837.0, "step": 5943 }, { "epoch": 2.028673835125448, "grad_norm": 0.24115771042462789, "learning_rate": 1.7981790591805766e-05, "loss": 0.2167, "num_tokens": 1134235799.0, "step": 5944 }, { "epoch": 2.029015190305513, "grad_norm": 0.22996355041271882, "learning_rate": 1.7975467880627213e-05, "loss": 0.2145, "num_tokens": 1134410379.0, "step": 5945 }, { "epoch": 2.0293565454855775, "grad_norm": 0.21409197603672325, "learning_rate": 1.796914516944866e-05, "loss": 0.2039, "num_tokens": 1134607422.0, "step": 5946 }, { "epoch": 2.0296979006656426, "grad_norm": 0.24836035504944232, "learning_rate": 1.7962822458270107e-05, "loss": 0.2623, "num_tokens": 1134805976.0, "step": 5947 }, { "epoch": 2.0300392558457077, "grad_norm": 0.23985917173290494, "learning_rate": 1.7956499747091554e-05, "loss": 0.2685, "num_tokens": 1135007840.0, "step": 5948 }, { "epoch": 2.0303806110257723, "grad_norm": 0.24417746779034546, "learning_rate": 1.7950177035913e-05, "loss": 0.2335, "num_tokens": 1135191055.0, "step": 5949 }, { "epoch": 2.0307219662058373, "grad_norm": 0.24437630468144186, "learning_rate": 1.7943854324734448e-05, "loss": 0.239, "num_tokens": 1135403920.0, "step": 5950 }, { "epoch": 2.031063321385902, "grad_norm": 0.23173954743842395, "learning_rate": 1.7937531613555895e-05, "loss": 0.2409, "num_tokens": 1135607106.0, "step": 5951 }, { "epoch": 2.031404676565967, "grad_norm": 0.2308824508280113, "learning_rate": 1.7931208902377342e-05, "loss": 0.2438, "num_tokens": 1135836245.0, "step": 5952 }, { "epoch": 2.0317460317460316, "grad_norm": 0.2198791884013217, "learning_rate": 1.7924886191198786e-05, "loss": 0.2264, "num_tokens": 1136061270.0, "step": 5953 }, { "epoch": 2.0320873869260967, "grad_norm": 0.22964302290468452, "learning_rate": 1.7918563480020233e-05, "loss": 0.2164, "num_tokens": 1136253249.0, "step": 5954 }, { "epoch": 2.0324287421061613, "grad_norm": 0.2304331541556403, "learning_rate": 1.791224076884168e-05, "loss": 0.2317, "num_tokens": 1136465939.0, "step": 5955 }, { "epoch": 2.0327700972862264, "grad_norm": 0.23236928330238654, "learning_rate": 1.7905918057663127e-05, "loss": 0.2259, "num_tokens": 1136652902.0, "step": 5956 }, { "epoch": 2.033111452466291, "grad_norm": 0.22901246619086899, "learning_rate": 1.7899595346484574e-05, "loss": 0.2172, "num_tokens": 1136836494.0, "step": 5957 }, { "epoch": 2.033452807646356, "grad_norm": 0.21261028590784858, "learning_rate": 1.7893272635306017e-05, "loss": 0.2423, "num_tokens": 1137088501.0, "step": 5958 }, { "epoch": 2.033794162826421, "grad_norm": 0.2371341326083987, "learning_rate": 1.7886949924127467e-05, "loss": 0.2301, "num_tokens": 1137253925.0, "step": 5959 }, { "epoch": 2.0341355180064857, "grad_norm": 0.2443547002338845, "learning_rate": 1.7880627212948914e-05, "loss": 0.2239, "num_tokens": 1137427971.0, "step": 5960 }, { "epoch": 2.034476873186551, "grad_norm": 0.2137965153540706, "learning_rate": 1.7874304501770358e-05, "loss": 0.2306, "num_tokens": 1137652038.0, "step": 5961 }, { "epoch": 2.0348182283666154, "grad_norm": 0.23522609323173874, "learning_rate": 1.7867981790591805e-05, "loss": 0.2344, "num_tokens": 1137851360.0, "step": 5962 }, { "epoch": 2.0351595835466805, "grad_norm": 0.24618954158002898, "learning_rate": 1.7861659079413255e-05, "loss": 0.2328, "num_tokens": 1138031428.0, "step": 5963 }, { "epoch": 2.035500938726745, "grad_norm": 0.2404791670938617, "learning_rate": 1.78553363682347e-05, "loss": 0.2349, "num_tokens": 1138216593.0, "step": 5964 }, { "epoch": 2.03584229390681, "grad_norm": 0.2528736937229265, "learning_rate": 1.7849013657056146e-05, "loss": 0.2529, "num_tokens": 1138405473.0, "step": 5965 }, { "epoch": 2.0361836490868748, "grad_norm": 0.21794781638553104, "learning_rate": 1.7842690945877593e-05, "loss": 0.2467, "num_tokens": 1138627808.0, "step": 5966 }, { "epoch": 2.03652500426694, "grad_norm": 0.264524380389823, "learning_rate": 1.783636823469904e-05, "loss": 0.2524, "num_tokens": 1138808603.0, "step": 5967 }, { "epoch": 2.0368663594470044, "grad_norm": 0.2333748671522735, "learning_rate": 1.7830045523520487e-05, "loss": 0.2421, "num_tokens": 1139001606.0, "step": 5968 }, { "epoch": 2.0372077146270695, "grad_norm": 0.20798737701003886, "learning_rate": 1.7823722812341934e-05, "loss": 0.2384, "num_tokens": 1139216851.0, "step": 5969 }, { "epoch": 2.037549069807134, "grad_norm": 0.22206911689685982, "learning_rate": 1.7817400101163378e-05, "loss": 0.2279, "num_tokens": 1139422926.0, "step": 5970 }, { "epoch": 2.037890424987199, "grad_norm": 0.24742064433686187, "learning_rate": 1.7811077389984825e-05, "loss": 0.1966, "num_tokens": 1139574263.0, "step": 5971 }, { "epoch": 2.0382317801672643, "grad_norm": 0.3056501882652474, "learning_rate": 1.7804754678806275e-05, "loss": 0.2455, "num_tokens": 1139719891.0, "step": 5972 }, { "epoch": 2.038573135347329, "grad_norm": 0.23424685227626085, "learning_rate": 1.779843196762772e-05, "loss": 0.2064, "num_tokens": 1139893369.0, "step": 5973 }, { "epoch": 2.038914490527394, "grad_norm": 0.2512162375322461, "learning_rate": 1.7792109256449165e-05, "loss": 0.228, "num_tokens": 1140061455.0, "step": 5974 }, { "epoch": 2.0392558457074585, "grad_norm": 0.25148967903502056, "learning_rate": 1.7785786545270612e-05, "loss": 0.2457, "num_tokens": 1140249705.0, "step": 5975 }, { "epoch": 2.0395972008875236, "grad_norm": 0.22156982751766713, "learning_rate": 1.777946383409206e-05, "loss": 0.2289, "num_tokens": 1140461406.0, "step": 5976 }, { "epoch": 2.0399385560675882, "grad_norm": 0.22429300368841643, "learning_rate": 1.7773141122913506e-05, "loss": 0.231, "num_tokens": 1140653485.0, "step": 5977 }, { "epoch": 2.0402799112476533, "grad_norm": 0.24926739213402382, "learning_rate": 1.7766818411734953e-05, "loss": 0.2397, "num_tokens": 1140848067.0, "step": 5978 }, { "epoch": 2.040621266427718, "grad_norm": 0.23369237058681025, "learning_rate": 1.7760495700556397e-05, "loss": 0.2235, "num_tokens": 1141042256.0, "step": 5979 }, { "epoch": 2.040962621607783, "grad_norm": 0.2284231508860309, "learning_rate": 1.7754172989377847e-05, "loss": 0.1996, "num_tokens": 1141211365.0, "step": 5980 }, { "epoch": 2.0413039767878476, "grad_norm": 0.23357101458687637, "learning_rate": 1.7747850278199294e-05, "loss": 0.2223, "num_tokens": 1141399781.0, "step": 5981 }, { "epoch": 2.0416453319679126, "grad_norm": 0.25072049976388383, "learning_rate": 1.7741527567020738e-05, "loss": 0.2289, "num_tokens": 1141555925.0, "step": 5982 }, { "epoch": 2.0419866871479773, "grad_norm": 0.23460534471506567, "learning_rate": 1.7735204855842185e-05, "loss": 0.2165, "num_tokens": 1141730447.0, "step": 5983 }, { "epoch": 2.0423280423280423, "grad_norm": 0.24316996734550092, "learning_rate": 1.7728882144663632e-05, "loss": 0.2536, "num_tokens": 1141919399.0, "step": 5984 }, { "epoch": 2.0426693975081074, "grad_norm": 0.23040583408884402, "learning_rate": 1.772255943348508e-05, "loss": 0.2601, "num_tokens": 1142137464.0, "step": 5985 }, { "epoch": 2.043010752688172, "grad_norm": 0.24681895611690807, "learning_rate": 1.7716236722306526e-05, "loss": 0.2243, "num_tokens": 1142293470.0, "step": 5986 }, { "epoch": 2.043352107868237, "grad_norm": 0.2277110278773166, "learning_rate": 1.7709914011127973e-05, "loss": 0.2027, "num_tokens": 1142460406.0, "step": 5987 }, { "epoch": 2.0436934630483017, "grad_norm": 0.25980785299699116, "learning_rate": 1.7703591299949416e-05, "loss": 0.2307, "num_tokens": 1142655555.0, "step": 5988 }, { "epoch": 2.0440348182283667, "grad_norm": 0.2318663863288549, "learning_rate": 1.7697268588770867e-05, "loss": 0.2668, "num_tokens": 1142868832.0, "step": 5989 }, { "epoch": 2.0443761734084314, "grad_norm": 0.3656627930643395, "learning_rate": 1.7690945877592314e-05, "loss": 0.2182, "num_tokens": 1143030599.0, "step": 5990 }, { "epoch": 2.0447175285884964, "grad_norm": 0.23164314616596046, "learning_rate": 1.7684623166413757e-05, "loss": 0.2295, "num_tokens": 1143221693.0, "step": 5991 }, { "epoch": 2.045058883768561, "grad_norm": 0.2375144790344711, "learning_rate": 1.7678300455235204e-05, "loss": 0.2337, "num_tokens": 1143410827.0, "step": 5992 }, { "epoch": 2.045400238948626, "grad_norm": 0.24658502361849705, "learning_rate": 1.7671977744056655e-05, "loss": 0.2316, "num_tokens": 1143581656.0, "step": 5993 }, { "epoch": 2.0457415941286907, "grad_norm": 0.261802554221294, "learning_rate": 1.76656550328781e-05, "loss": 0.2174, "num_tokens": 1143747631.0, "step": 5994 }, { "epoch": 2.046082949308756, "grad_norm": 0.24264748349638085, "learning_rate": 1.7659332321699545e-05, "loss": 0.2255, "num_tokens": 1143944721.0, "step": 5995 }, { "epoch": 2.0464243044888204, "grad_norm": 0.22834250926904787, "learning_rate": 1.7653009610520992e-05, "loss": 0.2274, "num_tokens": 1144148337.0, "step": 5996 }, { "epoch": 2.0467656596688855, "grad_norm": 0.2259919997098836, "learning_rate": 1.764668689934244e-05, "loss": 0.2618, "num_tokens": 1144399657.0, "step": 5997 }, { "epoch": 2.0471070148489505, "grad_norm": 0.2318834084864556, "learning_rate": 1.7640364188163886e-05, "loss": 0.2278, "num_tokens": 1144600118.0, "step": 5998 }, { "epoch": 2.047448370029015, "grad_norm": 0.22398523434244128, "learning_rate": 1.7634041476985333e-05, "loss": 0.2441, "num_tokens": 1144811327.0, "step": 5999 }, { "epoch": 2.04778972520908, "grad_norm": 0.2172441819196547, "learning_rate": 1.7627718765806777e-05, "loss": 0.2391, "num_tokens": 1145033734.0, "step": 6000 }, { "epoch": 2.048131080389145, "grad_norm": 0.25806258660914533, "learning_rate": 1.7621396054628224e-05, "loss": 0.245, "num_tokens": 1145210340.0, "step": 6001 }, { "epoch": 2.04847243556921, "grad_norm": 0.23229655484080453, "learning_rate": 1.7615073343449674e-05, "loss": 0.233, "num_tokens": 1145404729.0, "step": 6002 }, { "epoch": 2.0488137907492745, "grad_norm": 0.2084662878726268, "learning_rate": 1.7608750632271118e-05, "loss": 0.2427, "num_tokens": 1145654288.0, "step": 6003 }, { "epoch": 2.0491551459293396, "grad_norm": 0.2314880172755147, "learning_rate": 1.7602427921092565e-05, "loss": 0.2298, "num_tokens": 1145834862.0, "step": 6004 }, { "epoch": 2.049496501109404, "grad_norm": 0.23797416566550922, "learning_rate": 1.7596105209914012e-05, "loss": 0.2365, "num_tokens": 1146020156.0, "step": 6005 }, { "epoch": 2.0498378562894692, "grad_norm": 0.23659735783859265, "learning_rate": 1.758978249873546e-05, "loss": 0.239, "num_tokens": 1146242993.0, "step": 6006 }, { "epoch": 2.050179211469534, "grad_norm": 0.24582368545429695, "learning_rate": 1.7583459787556906e-05, "loss": 0.2296, "num_tokens": 1146421199.0, "step": 6007 }, { "epoch": 2.050520566649599, "grad_norm": 0.23825957672982684, "learning_rate": 1.7577137076378353e-05, "loss": 0.2546, "num_tokens": 1146636399.0, "step": 6008 }, { "epoch": 2.050861921829664, "grad_norm": 0.23043879679883317, "learning_rate": 1.7570814365199796e-05, "loss": 0.2209, "num_tokens": 1146820355.0, "step": 6009 }, { "epoch": 2.0512032770097286, "grad_norm": 0.26407624082951603, "learning_rate": 1.7564491654021247e-05, "loss": 0.2539, "num_tokens": 1146996387.0, "step": 6010 }, { "epoch": 2.0515446321897937, "grad_norm": 0.2159711145109571, "learning_rate": 1.7558168942842694e-05, "loss": 0.2334, "num_tokens": 1147203377.0, "step": 6011 }, { "epoch": 2.0518859873698583, "grad_norm": 0.23833744250811364, "learning_rate": 1.7551846231664137e-05, "loss": 0.2225, "num_tokens": 1147370090.0, "step": 6012 }, { "epoch": 2.0522273425499233, "grad_norm": 0.24239197491906156, "learning_rate": 1.7545523520485584e-05, "loss": 0.2443, "num_tokens": 1147566230.0, "step": 6013 }, { "epoch": 2.052568697729988, "grad_norm": 0.2345975407781412, "learning_rate": 1.753920080930703e-05, "loss": 0.2301, "num_tokens": 1147738214.0, "step": 6014 }, { "epoch": 2.052910052910053, "grad_norm": 0.2682671087101901, "learning_rate": 1.7532878098128478e-05, "loss": 0.2428, "num_tokens": 1147924290.0, "step": 6015 }, { "epoch": 2.0532514080901176, "grad_norm": 0.22247973740200705, "learning_rate": 1.7526555386949925e-05, "loss": 0.2373, "num_tokens": 1148113275.0, "step": 6016 }, { "epoch": 2.0535927632701827, "grad_norm": 0.2515698401420939, "learning_rate": 1.7520232675771372e-05, "loss": 0.2194, "num_tokens": 1148276581.0, "step": 6017 }, { "epoch": 2.0539341184502473, "grad_norm": 0.22177850966234736, "learning_rate": 1.7513909964592816e-05, "loss": 0.2217, "num_tokens": 1148479027.0, "step": 6018 }, { "epoch": 2.0542754736303124, "grad_norm": 0.23113270387013718, "learning_rate": 1.7507587253414266e-05, "loss": 0.2431, "num_tokens": 1148681812.0, "step": 6019 }, { "epoch": 2.054616828810377, "grad_norm": 0.23548156753635513, "learning_rate": 1.7501264542235713e-05, "loss": 0.2179, "num_tokens": 1148864838.0, "step": 6020 }, { "epoch": 2.054958183990442, "grad_norm": 0.2491419523280215, "learning_rate": 1.7494941831057157e-05, "loss": 0.1992, "num_tokens": 1149009262.0, "step": 6021 }, { "epoch": 2.055299539170507, "grad_norm": 0.23852051986091882, "learning_rate": 1.7488619119878604e-05, "loss": 0.2326, "num_tokens": 1149195687.0, "step": 6022 }, { "epoch": 2.0556408943505717, "grad_norm": 0.28295286947021375, "learning_rate": 1.748229640870005e-05, "loss": 0.2526, "num_tokens": 1149371932.0, "step": 6023 }, { "epoch": 2.055982249530637, "grad_norm": 0.22703348888535674, "learning_rate": 1.7475973697521498e-05, "loss": 0.229, "num_tokens": 1149584748.0, "step": 6024 }, { "epoch": 2.0563236047107014, "grad_norm": 0.2772428625439775, "learning_rate": 1.7469650986342945e-05, "loss": 0.261, "num_tokens": 1149794756.0, "step": 6025 }, { "epoch": 2.0566649598907665, "grad_norm": 0.24920966810914233, "learning_rate": 1.746332827516439e-05, "loss": 0.2092, "num_tokens": 1149960934.0, "step": 6026 }, { "epoch": 2.057006315070831, "grad_norm": 0.232351558619952, "learning_rate": 1.745700556398584e-05, "loss": 0.2365, "num_tokens": 1150160767.0, "step": 6027 }, { "epoch": 2.057347670250896, "grad_norm": 0.24606078197530457, "learning_rate": 1.7450682852807286e-05, "loss": 0.2311, "num_tokens": 1150334784.0, "step": 6028 }, { "epoch": 2.0576890254309608, "grad_norm": 0.21717730854383102, "learning_rate": 1.7444360141628733e-05, "loss": 0.2193, "num_tokens": 1150523353.0, "step": 6029 }, { "epoch": 2.058030380611026, "grad_norm": 0.26037626978332745, "learning_rate": 1.7438037430450176e-05, "loss": 0.2694, "num_tokens": 1150731215.0, "step": 6030 }, { "epoch": 2.0583717357910905, "grad_norm": 0.2437674530648422, "learning_rate": 1.7431714719271623e-05, "loss": 0.2403, "num_tokens": 1150923361.0, "step": 6031 }, { "epoch": 2.0587130909711555, "grad_norm": 0.2511614325674246, "learning_rate": 1.7425392008093074e-05, "loss": 0.2203, "num_tokens": 1151108335.0, "step": 6032 }, { "epoch": 2.05905444615122, "grad_norm": 0.23260788838843427, "learning_rate": 1.7419069296914517e-05, "loss": 0.2323, "num_tokens": 1151290797.0, "step": 6033 }, { "epoch": 2.059395801331285, "grad_norm": 0.22974288103913934, "learning_rate": 1.7412746585735964e-05, "loss": 0.2282, "num_tokens": 1151503433.0, "step": 6034 }, { "epoch": 2.0597371565113503, "grad_norm": 0.25913928407858683, "learning_rate": 1.740642387455741e-05, "loss": 0.2028, "num_tokens": 1151666608.0, "step": 6035 }, { "epoch": 2.060078511691415, "grad_norm": 0.2385218174641877, "learning_rate": 1.7400101163378858e-05, "loss": 0.2348, "num_tokens": 1151847391.0, "step": 6036 }, { "epoch": 2.06041986687148, "grad_norm": 0.2347180781400847, "learning_rate": 1.7393778452200305e-05, "loss": 0.2362, "num_tokens": 1152029550.0, "step": 6037 }, { "epoch": 2.0607612220515446, "grad_norm": 0.2554885042203472, "learning_rate": 1.7387455741021752e-05, "loss": 0.2137, "num_tokens": 1152215361.0, "step": 6038 }, { "epoch": 2.0611025772316096, "grad_norm": 0.22657205719431003, "learning_rate": 1.7381133029843196e-05, "loss": 0.2462, "num_tokens": 1152421254.0, "step": 6039 }, { "epoch": 2.0614439324116742, "grad_norm": 0.23664914030283524, "learning_rate": 1.7374810318664646e-05, "loss": 0.2512, "num_tokens": 1152616998.0, "step": 6040 }, { "epoch": 2.0617852875917393, "grad_norm": 0.23386561807138842, "learning_rate": 1.7368487607486093e-05, "loss": 0.2551, "num_tokens": 1152844203.0, "step": 6041 }, { "epoch": 2.062126642771804, "grad_norm": 0.2288219255395414, "learning_rate": 1.7362164896307537e-05, "loss": 0.2342, "num_tokens": 1153060937.0, "step": 6042 }, { "epoch": 2.062467997951869, "grad_norm": 0.2199725763186246, "learning_rate": 1.7355842185128984e-05, "loss": 0.2555, "num_tokens": 1153310205.0, "step": 6043 }, { "epoch": 2.0628093531319336, "grad_norm": 0.20339123289245262, "learning_rate": 1.734951947395043e-05, "loss": 0.2314, "num_tokens": 1153542420.0, "step": 6044 }, { "epoch": 2.0631507083119986, "grad_norm": 0.23522933137783603, "learning_rate": 1.7343196762771878e-05, "loss": 0.2243, "num_tokens": 1153719705.0, "step": 6045 }, { "epoch": 2.0634920634920633, "grad_norm": 0.21901116333647547, "learning_rate": 1.7336874051593325e-05, "loss": 0.2311, "num_tokens": 1153912407.0, "step": 6046 }, { "epoch": 2.0638334186721283, "grad_norm": 0.2145046471559994, "learning_rate": 1.733055134041477e-05, "loss": 0.2339, "num_tokens": 1154119885.0, "step": 6047 }, { "epoch": 2.0641747738521934, "grad_norm": 0.21492373560829467, "learning_rate": 1.7324228629236215e-05, "loss": 0.2327, "num_tokens": 1154326060.0, "step": 6048 }, { "epoch": 2.064516129032258, "grad_norm": 0.21585133171614782, "learning_rate": 1.7317905918057665e-05, "loss": 0.2293, "num_tokens": 1154532474.0, "step": 6049 }, { "epoch": 2.064857484212323, "grad_norm": 0.21952986669413557, "learning_rate": 1.7311583206879112e-05, "loss": 0.224, "num_tokens": 1154736433.0, "step": 6050 }, { "epoch": 2.0651988393923877, "grad_norm": 0.2279419392612536, "learning_rate": 1.7305260495700556e-05, "loss": 0.2177, "num_tokens": 1154898666.0, "step": 6051 }, { "epoch": 2.0655401945724527, "grad_norm": 0.2629000401523772, "learning_rate": 1.7298937784522003e-05, "loss": 0.2419, "num_tokens": 1155090333.0, "step": 6052 }, { "epoch": 2.0658815497525174, "grad_norm": 0.28225131089499694, "learning_rate": 1.729261507334345e-05, "loss": 0.2112, "num_tokens": 1155250443.0, "step": 6053 }, { "epoch": 2.0662229049325824, "grad_norm": 0.2289680441020458, "learning_rate": 1.7286292362164897e-05, "loss": 0.2301, "num_tokens": 1155438916.0, "step": 6054 }, { "epoch": 2.066564260112647, "grad_norm": 0.22335437021428653, "learning_rate": 1.7279969650986344e-05, "loss": 0.2259, "num_tokens": 1155635175.0, "step": 6055 }, { "epoch": 2.066905615292712, "grad_norm": 0.22761647772675914, "learning_rate": 1.727364693980779e-05, "loss": 0.2307, "num_tokens": 1155852536.0, "step": 6056 }, { "epoch": 2.0672469704727767, "grad_norm": 0.2549155411244269, "learning_rate": 1.7267324228629238e-05, "loss": 0.2284, "num_tokens": 1156022637.0, "step": 6057 }, { "epoch": 2.067588325652842, "grad_norm": 0.21868698917629803, "learning_rate": 1.7261001517450685e-05, "loss": 0.2322, "num_tokens": 1156242456.0, "step": 6058 }, { "epoch": 2.067929680832907, "grad_norm": 0.23251823518603204, "learning_rate": 1.7254678806272132e-05, "loss": 0.2309, "num_tokens": 1156434119.0, "step": 6059 }, { "epoch": 2.0682710360129715, "grad_norm": 0.22850567581527007, "learning_rate": 1.7248356095093576e-05, "loss": 0.2288, "num_tokens": 1156621706.0, "step": 6060 }, { "epoch": 2.0686123911930365, "grad_norm": 0.23003070661614022, "learning_rate": 1.7242033383915023e-05, "loss": 0.2174, "num_tokens": 1156808131.0, "step": 6061 }, { "epoch": 2.068953746373101, "grad_norm": 0.25000049129894225, "learning_rate": 1.7235710672736473e-05, "loss": 0.2418, "num_tokens": 1157007470.0, "step": 6062 }, { "epoch": 2.069295101553166, "grad_norm": 0.25150339975793234, "learning_rate": 1.7229387961557916e-05, "loss": 0.23, "num_tokens": 1157198660.0, "step": 6063 }, { "epoch": 2.069636456733231, "grad_norm": 0.24927711556794305, "learning_rate": 1.7223065250379363e-05, "loss": 0.2235, "num_tokens": 1157384047.0, "step": 6064 }, { "epoch": 2.069977811913296, "grad_norm": 0.2516368279464043, "learning_rate": 1.721674253920081e-05, "loss": 0.249, "num_tokens": 1157571695.0, "step": 6065 }, { "epoch": 2.0703191670933605, "grad_norm": 0.24819911778076603, "learning_rate": 1.7210419828022257e-05, "loss": 0.2209, "num_tokens": 1157744830.0, "step": 6066 }, { "epoch": 2.0706605222734256, "grad_norm": 0.23247507098354642, "learning_rate": 1.7204097116843704e-05, "loss": 0.2484, "num_tokens": 1157953641.0, "step": 6067 }, { "epoch": 2.07100187745349, "grad_norm": 0.23313381236166236, "learning_rate": 1.719777440566515e-05, "loss": 0.2558, "num_tokens": 1158153983.0, "step": 6068 }, { "epoch": 2.0713432326335552, "grad_norm": 0.24620842299596454, "learning_rate": 1.7191451694486595e-05, "loss": 0.2145, "num_tokens": 1158334668.0, "step": 6069 }, { "epoch": 2.07168458781362, "grad_norm": 0.24824869378479594, "learning_rate": 1.7185128983308045e-05, "loss": 0.2246, "num_tokens": 1158492862.0, "step": 6070 }, { "epoch": 2.072025942993685, "grad_norm": 0.23727502306709808, "learning_rate": 1.7178806272129492e-05, "loss": 0.2197, "num_tokens": 1158682966.0, "step": 6071 }, { "epoch": 2.07236729817375, "grad_norm": 0.26718104961298655, "learning_rate": 1.7172483560950936e-05, "loss": 0.2018, "num_tokens": 1158816572.0, "step": 6072 }, { "epoch": 2.0727086533538146, "grad_norm": 0.23264710789368614, "learning_rate": 1.7166160849772383e-05, "loss": 0.2207, "num_tokens": 1158981678.0, "step": 6073 }, { "epoch": 2.0730500085338797, "grad_norm": 0.2147345354293798, "learning_rate": 1.715983813859383e-05, "loss": 0.2565, "num_tokens": 1159234516.0, "step": 6074 }, { "epoch": 2.0733913637139443, "grad_norm": 0.2708596108299199, "learning_rate": 1.7153515427415277e-05, "loss": 0.2257, "num_tokens": 1159398827.0, "step": 6075 }, { "epoch": 2.0737327188940093, "grad_norm": 0.2334013159900587, "learning_rate": 1.7147192716236724e-05, "loss": 0.2435, "num_tokens": 1159606173.0, "step": 6076 }, { "epoch": 2.074074074074074, "grad_norm": 0.25025309720400685, "learning_rate": 1.714087000505817e-05, "loss": 0.2407, "num_tokens": 1159827347.0, "step": 6077 }, { "epoch": 2.074415429254139, "grad_norm": 0.24274229502175806, "learning_rate": 1.7134547293879614e-05, "loss": 0.2199, "num_tokens": 1159979863.0, "step": 6078 }, { "epoch": 2.0747567844342036, "grad_norm": 0.22135296673034338, "learning_rate": 1.7128224582701065e-05, "loss": 0.2256, "num_tokens": 1160179350.0, "step": 6079 }, { "epoch": 2.0750981396142687, "grad_norm": 0.2504303847260293, "learning_rate": 1.7121901871522512e-05, "loss": 0.2396, "num_tokens": 1160381925.0, "step": 6080 }, { "epoch": 2.0754394947943333, "grad_norm": 0.3484845622687106, "learning_rate": 1.7115579160343955e-05, "loss": 0.2309, "num_tokens": 1160579292.0, "step": 6081 }, { "epoch": 2.0757808499743984, "grad_norm": 0.24236573434137046, "learning_rate": 1.7109256449165402e-05, "loss": 0.231, "num_tokens": 1160767272.0, "step": 6082 }, { "epoch": 2.076122205154463, "grad_norm": 0.22742317068327722, "learning_rate": 1.710293373798685e-05, "loss": 0.2264, "num_tokens": 1160966218.0, "step": 6083 }, { "epoch": 2.076463560334528, "grad_norm": 0.2426116765587614, "learning_rate": 1.7096611026808296e-05, "loss": 0.2186, "num_tokens": 1161143706.0, "step": 6084 }, { "epoch": 2.076804915514593, "grad_norm": 0.2560770067386594, "learning_rate": 1.7090288315629743e-05, "loss": 0.2096, "num_tokens": 1161298435.0, "step": 6085 }, { "epoch": 2.0771462706946577, "grad_norm": 0.24324224874107445, "learning_rate": 1.708396560445119e-05, "loss": 0.2534, "num_tokens": 1161500267.0, "step": 6086 }, { "epoch": 2.077487625874723, "grad_norm": 0.24081062035609063, "learning_rate": 1.7077642893272637e-05, "loss": 0.2533, "num_tokens": 1161717972.0, "step": 6087 }, { "epoch": 2.0778289810547874, "grad_norm": 0.25616023693550394, "learning_rate": 1.7071320182094084e-05, "loss": 0.2355, "num_tokens": 1161902856.0, "step": 6088 }, { "epoch": 2.0781703362348525, "grad_norm": 0.24060282148987128, "learning_rate": 1.7064997470915528e-05, "loss": 0.2222, "num_tokens": 1162082946.0, "step": 6089 }, { "epoch": 2.078511691414917, "grad_norm": 0.2468706233781958, "learning_rate": 1.7058674759736975e-05, "loss": 0.224, "num_tokens": 1162270387.0, "step": 6090 }, { "epoch": 2.078853046594982, "grad_norm": 0.2705573060379201, "learning_rate": 1.7052352048558422e-05, "loss": 0.2105, "num_tokens": 1162430980.0, "step": 6091 }, { "epoch": 2.0791944017750468, "grad_norm": 0.24385679949073294, "learning_rate": 1.704602933737987e-05, "loss": 0.2356, "num_tokens": 1162601672.0, "step": 6092 }, { "epoch": 2.079535756955112, "grad_norm": 0.23505262055240247, "learning_rate": 1.7039706626201316e-05, "loss": 0.2309, "num_tokens": 1162791514.0, "step": 6093 }, { "epoch": 2.0798771121351765, "grad_norm": 0.24960719845145374, "learning_rate": 1.7033383915022763e-05, "loss": 0.2525, "num_tokens": 1163006348.0, "step": 6094 }, { "epoch": 2.0802184673152415, "grad_norm": 0.30856372740148974, "learning_rate": 1.7027061203844206e-05, "loss": 0.2406, "num_tokens": 1163203136.0, "step": 6095 }, { "epoch": 2.0805598224953066, "grad_norm": 0.2356608612194541, "learning_rate": 1.7020738492665657e-05, "loss": 0.2248, "num_tokens": 1163391884.0, "step": 6096 }, { "epoch": 2.080901177675371, "grad_norm": 0.2598117689560733, "learning_rate": 1.7014415781487104e-05, "loss": 0.2325, "num_tokens": 1163571623.0, "step": 6097 }, { "epoch": 2.0812425328554363, "grad_norm": 0.21504566960592517, "learning_rate": 1.7008093070308547e-05, "loss": 0.236, "num_tokens": 1163789055.0, "step": 6098 }, { "epoch": 2.081583888035501, "grad_norm": 0.2378022560698843, "learning_rate": 1.7001770359129994e-05, "loss": 0.2059, "num_tokens": 1163966892.0, "step": 6099 }, { "epoch": 2.081925243215566, "grad_norm": 0.2513114778441228, "learning_rate": 1.699544764795144e-05, "loss": 0.2323, "num_tokens": 1164155880.0, "step": 6100 }, { "epoch": 2.0822665983956306, "grad_norm": 0.24000534200922208, "learning_rate": 1.6989124936772888e-05, "loss": 0.2373, "num_tokens": 1164361149.0, "step": 6101 }, { "epoch": 2.0826079535756956, "grad_norm": 0.2389708447880411, "learning_rate": 1.6982802225594335e-05, "loss": 0.2221, "num_tokens": 1164522552.0, "step": 6102 }, { "epoch": 2.0829493087557602, "grad_norm": 0.25408044894756504, "learning_rate": 1.6976479514415782e-05, "loss": 0.2229, "num_tokens": 1164698752.0, "step": 6103 }, { "epoch": 2.0832906639358253, "grad_norm": 0.2451503928663336, "learning_rate": 1.697015680323723e-05, "loss": 0.2239, "num_tokens": 1164878751.0, "step": 6104 }, { "epoch": 2.08363201911589, "grad_norm": 0.2475913981245334, "learning_rate": 1.6963834092058676e-05, "loss": 0.2106, "num_tokens": 1165051997.0, "step": 6105 }, { "epoch": 2.083973374295955, "grad_norm": 0.23880105353781733, "learning_rate": 1.6957511380880123e-05, "loss": 0.2456, "num_tokens": 1165268242.0, "step": 6106 }, { "epoch": 2.0843147294760196, "grad_norm": 0.2267028734822122, "learning_rate": 1.6951188669701567e-05, "loss": 0.219, "num_tokens": 1165471073.0, "step": 6107 }, { "epoch": 2.0846560846560847, "grad_norm": 0.25003030053259856, "learning_rate": 1.6944865958523014e-05, "loss": 0.2323, "num_tokens": 1165684272.0, "step": 6108 }, { "epoch": 2.0849974398361497, "grad_norm": 0.2445200971966855, "learning_rate": 1.6938543247344464e-05, "loss": 0.2131, "num_tokens": 1165860379.0, "step": 6109 }, { "epoch": 2.0853387950162143, "grad_norm": 0.23423396443450892, "learning_rate": 1.6932220536165908e-05, "loss": 0.2603, "num_tokens": 1166071982.0, "step": 6110 }, { "epoch": 2.0856801501962794, "grad_norm": 0.26143595031556527, "learning_rate": 1.6925897824987355e-05, "loss": 0.2297, "num_tokens": 1166250605.0, "step": 6111 }, { "epoch": 2.086021505376344, "grad_norm": 0.256778516802807, "learning_rate": 1.6919575113808802e-05, "loss": 0.2343, "num_tokens": 1166436482.0, "step": 6112 }, { "epoch": 2.086362860556409, "grad_norm": 0.24219967105608134, "learning_rate": 1.691325240263025e-05, "loss": 0.2225, "num_tokens": 1166624160.0, "step": 6113 }, { "epoch": 2.0867042157364737, "grad_norm": 0.25123533115732316, "learning_rate": 1.6906929691451696e-05, "loss": 0.2381, "num_tokens": 1166813684.0, "step": 6114 }, { "epoch": 2.0870455709165388, "grad_norm": 0.2227216081261321, "learning_rate": 1.6900606980273143e-05, "loss": 0.2126, "num_tokens": 1167003248.0, "step": 6115 }, { "epoch": 2.0873869260966034, "grad_norm": 0.2489635333699625, "learning_rate": 1.6894284269094586e-05, "loss": 0.237, "num_tokens": 1167215326.0, "step": 6116 }, { "epoch": 2.0877282812766684, "grad_norm": 0.2459491430078413, "learning_rate": 1.6887961557916037e-05, "loss": 0.2382, "num_tokens": 1167430617.0, "step": 6117 }, { "epoch": 2.088069636456733, "grad_norm": 0.24396606073602595, "learning_rate": 1.6881638846737484e-05, "loss": 0.2216, "num_tokens": 1167604798.0, "step": 6118 }, { "epoch": 2.088410991636798, "grad_norm": 0.28396219895962105, "learning_rate": 1.6875316135558927e-05, "loss": 0.1998, "num_tokens": 1167769416.0, "step": 6119 }, { "epoch": 2.0887523468168627, "grad_norm": 0.25351194456732806, "learning_rate": 1.6868993424380374e-05, "loss": 0.2466, "num_tokens": 1167952842.0, "step": 6120 }, { "epoch": 2.089093701996928, "grad_norm": 0.25625063077634624, "learning_rate": 1.686267071320182e-05, "loss": 0.2127, "num_tokens": 1168111103.0, "step": 6121 }, { "epoch": 2.089435057176993, "grad_norm": 0.230861776833573, "learning_rate": 1.6856348002023268e-05, "loss": 0.2187, "num_tokens": 1168313153.0, "step": 6122 }, { "epoch": 2.0897764123570575, "grad_norm": 0.2661616662147933, "learning_rate": 1.6850025290844715e-05, "loss": 0.248, "num_tokens": 1168492522.0, "step": 6123 }, { "epoch": 2.0901177675371225, "grad_norm": 0.25186234551391923, "learning_rate": 1.6843702579666162e-05, "loss": 0.2129, "num_tokens": 1168663511.0, "step": 6124 }, { "epoch": 2.090459122717187, "grad_norm": 0.22590473792381127, "learning_rate": 1.6837379868487606e-05, "loss": 0.2052, "num_tokens": 1168867599.0, "step": 6125 }, { "epoch": 2.090800477897252, "grad_norm": 0.2500241178491433, "learning_rate": 1.6831057157309056e-05, "loss": 0.2506, "num_tokens": 1169081269.0, "step": 6126 }, { "epoch": 2.091141833077317, "grad_norm": 0.2731155972281338, "learning_rate": 1.6824734446130503e-05, "loss": 0.2485, "num_tokens": 1169284439.0, "step": 6127 }, { "epoch": 2.091483188257382, "grad_norm": 0.23820798008710906, "learning_rate": 1.6818411734951947e-05, "loss": 0.2134, "num_tokens": 1169481944.0, "step": 6128 }, { "epoch": 2.0918245434374465, "grad_norm": 0.2528813213204708, "learning_rate": 1.6812089023773394e-05, "loss": 0.2193, "num_tokens": 1169649356.0, "step": 6129 }, { "epoch": 2.0921658986175116, "grad_norm": 0.2426060238032, "learning_rate": 1.680576631259484e-05, "loss": 0.235, "num_tokens": 1169848377.0, "step": 6130 }, { "epoch": 2.092507253797576, "grad_norm": 0.251020670427932, "learning_rate": 1.6799443601416288e-05, "loss": 0.2085, "num_tokens": 1170015798.0, "step": 6131 }, { "epoch": 2.0928486089776412, "grad_norm": 0.274767690072198, "learning_rate": 1.6793120890237735e-05, "loss": 0.2226, "num_tokens": 1170192224.0, "step": 6132 }, { "epoch": 2.0931899641577063, "grad_norm": 0.2370720941209939, "learning_rate": 1.678679817905918e-05, "loss": 0.2464, "num_tokens": 1170364106.0, "step": 6133 }, { "epoch": 2.093531319337771, "grad_norm": 0.23499289556533848, "learning_rate": 1.678047546788063e-05, "loss": 0.2347, "num_tokens": 1170575263.0, "step": 6134 }, { "epoch": 2.093872674517836, "grad_norm": 0.23751619803872773, "learning_rate": 1.6774152756702076e-05, "loss": 0.2227, "num_tokens": 1170769799.0, "step": 6135 }, { "epoch": 2.0942140296979006, "grad_norm": 0.25436922296816866, "learning_rate": 1.6767830045523523e-05, "loss": 0.2329, "num_tokens": 1170948757.0, "step": 6136 }, { "epoch": 2.0945553848779657, "grad_norm": 0.2691532658845532, "learning_rate": 1.6761507334344966e-05, "loss": 0.2504, "num_tokens": 1171143354.0, "step": 6137 }, { "epoch": 2.0948967400580303, "grad_norm": 0.25662305402578284, "learning_rate": 1.6755184623166413e-05, "loss": 0.2229, "num_tokens": 1171302854.0, "step": 6138 }, { "epoch": 2.0952380952380953, "grad_norm": 0.2474920874577366, "learning_rate": 1.6748861911987863e-05, "loss": 0.2152, "num_tokens": 1171475879.0, "step": 6139 }, { "epoch": 2.09557945041816, "grad_norm": 0.2377879203842615, "learning_rate": 1.6742539200809307e-05, "loss": 0.2362, "num_tokens": 1171692360.0, "step": 6140 }, { "epoch": 2.095920805598225, "grad_norm": 0.23336389301495677, "learning_rate": 1.6736216489630754e-05, "loss": 0.2361, "num_tokens": 1171900554.0, "step": 6141 }, { "epoch": 2.0962621607782896, "grad_norm": 0.22920374196730328, "learning_rate": 1.67298937784522e-05, "loss": 0.2357, "num_tokens": 1172094029.0, "step": 6142 }, { "epoch": 2.0966035159583547, "grad_norm": 0.25913622706276496, "learning_rate": 1.6723571067273648e-05, "loss": 0.2707, "num_tokens": 1172289952.0, "step": 6143 }, { "epoch": 2.0969448711384193, "grad_norm": 0.2380949491060173, "learning_rate": 1.6717248356095095e-05, "loss": 0.2317, "num_tokens": 1172492369.0, "step": 6144 }, { "epoch": 2.0972862263184844, "grad_norm": 0.2468103145422355, "learning_rate": 1.6710925644916542e-05, "loss": 0.2025, "num_tokens": 1172645229.0, "step": 6145 }, { "epoch": 2.0976275814985494, "grad_norm": 0.22159712357502592, "learning_rate": 1.6704602933737986e-05, "loss": 0.2474, "num_tokens": 1172869091.0, "step": 6146 }, { "epoch": 2.097968936678614, "grad_norm": 0.24337312547211007, "learning_rate": 1.6698280222559436e-05, "loss": 0.2107, "num_tokens": 1173045279.0, "step": 6147 }, { "epoch": 2.098310291858679, "grad_norm": 0.24772795677995177, "learning_rate": 1.6691957511380883e-05, "loss": 0.2274, "num_tokens": 1173253343.0, "step": 6148 }, { "epoch": 2.0986516470387437, "grad_norm": 0.22821005189548182, "learning_rate": 1.6685634800202327e-05, "loss": 0.2378, "num_tokens": 1173459041.0, "step": 6149 }, { "epoch": 2.098993002218809, "grad_norm": 0.23063673395472592, "learning_rate": 1.6679312089023773e-05, "loss": 0.2036, "num_tokens": 1173632845.0, "step": 6150 }, { "epoch": 2.0993343573988734, "grad_norm": 0.24308153135829189, "learning_rate": 1.667298937784522e-05, "loss": 0.2478, "num_tokens": 1173846209.0, "step": 6151 }, { "epoch": 2.0996757125789385, "grad_norm": 0.23763385851469154, "learning_rate": 1.6666666666666667e-05, "loss": 0.2388, "num_tokens": 1174060303.0, "step": 6152 }, { "epoch": 2.100017067759003, "grad_norm": 0.2691207275771645, "learning_rate": 1.6660343955488114e-05, "loss": 0.2293, "num_tokens": 1174211080.0, "step": 6153 }, { "epoch": 2.100358422939068, "grad_norm": 0.2442904493462471, "learning_rate": 1.665402124430956e-05, "loss": 0.2594, "num_tokens": 1174422732.0, "step": 6154 }, { "epoch": 2.1006997781191328, "grad_norm": 0.23260350719539197, "learning_rate": 1.6647698533131005e-05, "loss": 0.2187, "num_tokens": 1174628110.0, "step": 6155 }, { "epoch": 2.101041133299198, "grad_norm": 0.26001492483389743, "learning_rate": 1.6641375821952455e-05, "loss": 0.2222, "num_tokens": 1174772768.0, "step": 6156 }, { "epoch": 2.1013824884792625, "grad_norm": 0.2464120111899009, "learning_rate": 1.6635053110773902e-05, "loss": 0.2469, "num_tokens": 1174998289.0, "step": 6157 }, { "epoch": 2.1017238436593275, "grad_norm": 0.2505701657861731, "learning_rate": 1.6628730399595346e-05, "loss": 0.2247, "num_tokens": 1175192864.0, "step": 6158 }, { "epoch": 2.1020651988393926, "grad_norm": 0.264400644614151, "learning_rate": 1.6622407688416793e-05, "loss": 0.2032, "num_tokens": 1175347900.0, "step": 6159 }, { "epoch": 2.102406554019457, "grad_norm": 0.23023083447255577, "learning_rate": 1.661608497723824e-05, "loss": 0.2297, "num_tokens": 1175555997.0, "step": 6160 }, { "epoch": 2.1027479091995223, "grad_norm": 0.22322486676133982, "learning_rate": 1.6609762266059687e-05, "loss": 0.2415, "num_tokens": 1175767680.0, "step": 6161 }, { "epoch": 2.103089264379587, "grad_norm": 0.2409176934428512, "learning_rate": 1.6603439554881134e-05, "loss": 0.2273, "num_tokens": 1175955623.0, "step": 6162 }, { "epoch": 2.103430619559652, "grad_norm": 0.2285091663764813, "learning_rate": 1.659711684370258e-05, "loss": 0.2164, "num_tokens": 1176157498.0, "step": 6163 }, { "epoch": 2.1037719747397166, "grad_norm": 0.27297270546799496, "learning_rate": 1.6590794132524028e-05, "loss": 0.2321, "num_tokens": 1176314690.0, "step": 6164 }, { "epoch": 2.1041133299197816, "grad_norm": 0.24952322055150972, "learning_rate": 1.6584471421345475e-05, "loss": 0.2307, "num_tokens": 1176519869.0, "step": 6165 }, { "epoch": 2.1044546850998462, "grad_norm": 0.25661753617993494, "learning_rate": 1.6578148710166922e-05, "loss": 0.2437, "num_tokens": 1176706470.0, "step": 6166 }, { "epoch": 2.1047960402799113, "grad_norm": 0.24357674231366283, "learning_rate": 1.6571825998988365e-05, "loss": 0.2253, "num_tokens": 1176892662.0, "step": 6167 }, { "epoch": 2.105137395459976, "grad_norm": 0.24181912422748808, "learning_rate": 1.6565503287809812e-05, "loss": 0.2132, "num_tokens": 1177074365.0, "step": 6168 }, { "epoch": 2.105478750640041, "grad_norm": 0.24720096738717146, "learning_rate": 1.6559180576631263e-05, "loss": 0.2221, "num_tokens": 1177256578.0, "step": 6169 }, { "epoch": 2.105820105820106, "grad_norm": 0.22102417877364153, "learning_rate": 1.6552857865452706e-05, "loss": 0.228, "num_tokens": 1177448211.0, "step": 6170 }, { "epoch": 2.1061614610001707, "grad_norm": 0.25223181601602374, "learning_rate": 1.6546535154274153e-05, "loss": 0.2475, "num_tokens": 1177621885.0, "step": 6171 }, { "epoch": 2.1065028161802357, "grad_norm": 0.2533434687222597, "learning_rate": 1.65402124430956e-05, "loss": 0.219, "num_tokens": 1177799135.0, "step": 6172 }, { "epoch": 2.1068441713603003, "grad_norm": 0.2228320711066175, "learning_rate": 1.6533889731917047e-05, "loss": 0.2292, "num_tokens": 1178029427.0, "step": 6173 }, { "epoch": 2.1071855265403654, "grad_norm": 0.23695763681358561, "learning_rate": 1.6527567020738494e-05, "loss": 0.2261, "num_tokens": 1178224737.0, "step": 6174 }, { "epoch": 2.10752688172043, "grad_norm": 0.24993222266820556, "learning_rate": 1.652124430955994e-05, "loss": 0.2072, "num_tokens": 1178386114.0, "step": 6175 }, { "epoch": 2.107868236900495, "grad_norm": 0.24919616956149832, "learning_rate": 1.6514921598381385e-05, "loss": 0.2185, "num_tokens": 1178554869.0, "step": 6176 }, { "epoch": 2.1082095920805597, "grad_norm": 0.2428907800714949, "learning_rate": 1.6508598887202832e-05, "loss": 0.2424, "num_tokens": 1178757192.0, "step": 6177 }, { "epoch": 2.1085509472606248, "grad_norm": 0.227016935511184, "learning_rate": 1.6502276176024282e-05, "loss": 0.2274, "num_tokens": 1178974710.0, "step": 6178 }, { "epoch": 2.1088923024406894, "grad_norm": 0.23032586247756617, "learning_rate": 1.6495953464845726e-05, "loss": 0.2446, "num_tokens": 1179164720.0, "step": 6179 }, { "epoch": 2.1092336576207544, "grad_norm": 0.24305204654086326, "learning_rate": 1.6489630753667173e-05, "loss": 0.2342, "num_tokens": 1179363619.0, "step": 6180 }, { "epoch": 2.109575012800819, "grad_norm": 0.25939775020716344, "learning_rate": 1.648330804248862e-05, "loss": 0.2123, "num_tokens": 1179521728.0, "step": 6181 }, { "epoch": 2.109916367980884, "grad_norm": 0.2659395720016433, "learning_rate": 1.6476985331310067e-05, "loss": 0.2388, "num_tokens": 1179701431.0, "step": 6182 }, { "epoch": 2.110257723160949, "grad_norm": 0.21953047266278583, "learning_rate": 1.6470662620131514e-05, "loss": 0.219, "num_tokens": 1179909872.0, "step": 6183 }, { "epoch": 2.110599078341014, "grad_norm": 0.2210887308918335, "learning_rate": 1.646433990895296e-05, "loss": 0.2275, "num_tokens": 1180134180.0, "step": 6184 }, { "epoch": 2.110940433521079, "grad_norm": 0.24522420934467543, "learning_rate": 1.6458017197774404e-05, "loss": 0.233, "num_tokens": 1180317254.0, "step": 6185 }, { "epoch": 2.1112817887011435, "grad_norm": 0.23051988479499433, "learning_rate": 1.6451694486595855e-05, "loss": 0.2344, "num_tokens": 1180527082.0, "step": 6186 }, { "epoch": 2.1116231438812085, "grad_norm": 0.24264557276472412, "learning_rate": 1.6445371775417302e-05, "loss": 0.2483, "num_tokens": 1180727469.0, "step": 6187 }, { "epoch": 2.111964499061273, "grad_norm": 0.23023635421185024, "learning_rate": 1.6439049064238745e-05, "loss": 0.2358, "num_tokens": 1180949003.0, "step": 6188 }, { "epoch": 2.112305854241338, "grad_norm": 0.25174005092284646, "learning_rate": 1.6432726353060192e-05, "loss": 0.1994, "num_tokens": 1181095986.0, "step": 6189 }, { "epoch": 2.112647209421403, "grad_norm": 0.2683579851123194, "learning_rate": 1.642640364188164e-05, "loss": 0.2434, "num_tokens": 1181289875.0, "step": 6190 }, { "epoch": 2.112988564601468, "grad_norm": 0.24083528782811836, "learning_rate": 1.6420080930703086e-05, "loss": 0.2767, "num_tokens": 1181478188.0, "step": 6191 }, { "epoch": 2.1133299197815325, "grad_norm": 0.22364233273779502, "learning_rate": 1.6413758219524533e-05, "loss": 0.2294, "num_tokens": 1181702645.0, "step": 6192 }, { "epoch": 2.1136712749615976, "grad_norm": 0.24110666322697188, "learning_rate": 1.640743550834598e-05, "loss": 0.2001, "num_tokens": 1181883844.0, "step": 6193 }, { "epoch": 2.114012630141662, "grad_norm": 0.25181348933550274, "learning_rate": 1.6401112797167427e-05, "loss": 0.249, "num_tokens": 1182079448.0, "step": 6194 }, { "epoch": 2.1143539853217272, "grad_norm": 0.2509528363684999, "learning_rate": 1.6394790085988874e-05, "loss": 0.2351, "num_tokens": 1182242510.0, "step": 6195 }, { "epoch": 2.1146953405017923, "grad_norm": 0.25783775383152524, "learning_rate": 1.638846737481032e-05, "loss": 0.2296, "num_tokens": 1182427739.0, "step": 6196 }, { "epoch": 2.115036695681857, "grad_norm": 0.2275807592939975, "learning_rate": 1.6382144663631765e-05, "loss": 0.2243, "num_tokens": 1182635578.0, "step": 6197 }, { "epoch": 2.115378050861922, "grad_norm": 0.2306731491111555, "learning_rate": 1.6375821952453212e-05, "loss": 0.2276, "num_tokens": 1182844703.0, "step": 6198 }, { "epoch": 2.1157194060419866, "grad_norm": 0.24983872039572091, "learning_rate": 1.6369499241274662e-05, "loss": 0.2331, "num_tokens": 1183021983.0, "step": 6199 }, { "epoch": 2.1160607612220517, "grad_norm": 0.2691062359686062, "learning_rate": 1.6363176530096106e-05, "loss": 0.2309, "num_tokens": 1183220711.0, "step": 6200 }, { "epoch": 2.1164021164021163, "grad_norm": 0.24920658833547385, "learning_rate": 1.6356853818917553e-05, "loss": 0.2211, "num_tokens": 1183375228.0, "step": 6201 }, { "epoch": 2.1167434715821813, "grad_norm": 0.264052517143862, "learning_rate": 1.6350531107739e-05, "loss": 0.2387, "num_tokens": 1183577313.0, "step": 6202 }, { "epoch": 2.117084826762246, "grad_norm": 0.2684990942168881, "learning_rate": 1.6344208396560447e-05, "loss": 0.2556, "num_tokens": 1183750017.0, "step": 6203 }, { "epoch": 2.117426181942311, "grad_norm": 0.24562355077503464, "learning_rate": 1.6337885685381894e-05, "loss": 0.2358, "num_tokens": 1183922917.0, "step": 6204 }, { "epoch": 2.1177675371223756, "grad_norm": 0.23974500234151178, "learning_rate": 1.633156297420334e-05, "loss": 0.2378, "num_tokens": 1184126797.0, "step": 6205 }, { "epoch": 2.1181088923024407, "grad_norm": 0.24365351543554814, "learning_rate": 1.6325240263024784e-05, "loss": 0.2285, "num_tokens": 1184292197.0, "step": 6206 }, { "epoch": 2.1184502474825058, "grad_norm": 0.25920165983491883, "learning_rate": 1.631891755184623e-05, "loss": 0.2485, "num_tokens": 1184483569.0, "step": 6207 }, { "epoch": 2.1187916026625704, "grad_norm": 0.23704810308317947, "learning_rate": 1.631259484066768e-05, "loss": 0.2623, "num_tokens": 1184702312.0, "step": 6208 }, { "epoch": 2.1191329578426354, "grad_norm": 0.2734677088571662, "learning_rate": 1.6306272129489125e-05, "loss": 0.2394, "num_tokens": 1184900794.0, "step": 6209 }, { "epoch": 2.1194743130227, "grad_norm": 0.25541544265740856, "learning_rate": 1.6299949418310572e-05, "loss": 0.2453, "num_tokens": 1185081956.0, "step": 6210 }, { "epoch": 2.119815668202765, "grad_norm": 0.25705413791118487, "learning_rate": 1.629362670713202e-05, "loss": 0.2473, "num_tokens": 1185248625.0, "step": 6211 }, { "epoch": 2.1201570233828297, "grad_norm": 0.2620105496260393, "learning_rate": 1.6287303995953466e-05, "loss": 0.2275, "num_tokens": 1185426885.0, "step": 6212 }, { "epoch": 2.120498378562895, "grad_norm": 0.2556908085842101, "learning_rate": 1.6280981284774913e-05, "loss": 0.2658, "num_tokens": 1185626009.0, "step": 6213 }, { "epoch": 2.1208397337429594, "grad_norm": 0.25373377844706335, "learning_rate": 1.627465857359636e-05, "loss": 0.2351, "num_tokens": 1185794490.0, "step": 6214 }, { "epoch": 2.1211810889230245, "grad_norm": 0.24618553737245347, "learning_rate": 1.6268335862417804e-05, "loss": 0.2052, "num_tokens": 1185972024.0, "step": 6215 }, { "epoch": 2.121522444103089, "grad_norm": 0.26901700468067674, "learning_rate": 1.6262013151239254e-05, "loss": 0.2262, "num_tokens": 1186147989.0, "step": 6216 }, { "epoch": 2.121863799283154, "grad_norm": 0.23426090608003833, "learning_rate": 1.62556904400607e-05, "loss": 0.2447, "num_tokens": 1186354064.0, "step": 6217 }, { "epoch": 2.122205154463219, "grad_norm": 0.2500828612475091, "learning_rate": 1.6249367728882145e-05, "loss": 0.237, "num_tokens": 1186558130.0, "step": 6218 }, { "epoch": 2.122546509643284, "grad_norm": 0.23703392427350709, "learning_rate": 1.624304501770359e-05, "loss": 0.2317, "num_tokens": 1186749988.0, "step": 6219 }, { "epoch": 2.122887864823349, "grad_norm": 0.24370810062601106, "learning_rate": 1.623672230652504e-05, "loss": 0.2574, "num_tokens": 1186974853.0, "step": 6220 }, { "epoch": 2.1232292200034135, "grad_norm": 0.230168435489899, "learning_rate": 1.6230399595346486e-05, "loss": 0.2359, "num_tokens": 1187156821.0, "step": 6221 }, { "epoch": 2.1235705751834786, "grad_norm": 0.23694223083005023, "learning_rate": 1.6224076884167933e-05, "loss": 0.2314, "num_tokens": 1187351683.0, "step": 6222 }, { "epoch": 2.123911930363543, "grad_norm": 0.26150042712168803, "learning_rate": 1.6217754172989376e-05, "loss": 0.1793, "num_tokens": 1187481223.0, "step": 6223 }, { "epoch": 2.1242532855436083, "grad_norm": 0.2286306074886366, "learning_rate": 1.6211431461810827e-05, "loss": 0.2295, "num_tokens": 1187675163.0, "step": 6224 }, { "epoch": 2.124594640723673, "grad_norm": 0.22837424059710273, "learning_rate": 1.6205108750632274e-05, "loss": 0.2338, "num_tokens": 1187873859.0, "step": 6225 }, { "epoch": 2.124935995903738, "grad_norm": 0.24345978490414, "learning_rate": 1.6198786039453717e-05, "loss": 0.2398, "num_tokens": 1188062792.0, "step": 6226 }, { "epoch": 2.1252773510838026, "grad_norm": 0.2356761923595149, "learning_rate": 1.6192463328275164e-05, "loss": 0.2144, "num_tokens": 1188262000.0, "step": 6227 }, { "epoch": 2.1256187062638676, "grad_norm": 0.2286021625562686, "learning_rate": 1.618614061709661e-05, "loss": 0.2063, "num_tokens": 1188422502.0, "step": 6228 }, { "epoch": 2.1259600614439322, "grad_norm": 0.23428860131325813, "learning_rate": 1.6179817905918058e-05, "loss": 0.2034, "num_tokens": 1188583056.0, "step": 6229 }, { "epoch": 2.1263014166239973, "grad_norm": 0.27372910676816437, "learning_rate": 1.6173495194739505e-05, "loss": 0.2335, "num_tokens": 1188755518.0, "step": 6230 }, { "epoch": 2.126642771804062, "grad_norm": 0.24237712225397257, "learning_rate": 1.6167172483560952e-05, "loss": 0.2032, "num_tokens": 1188921555.0, "step": 6231 }, { "epoch": 2.126984126984127, "grad_norm": 0.2587257394073563, "learning_rate": 1.6160849772382396e-05, "loss": 0.22, "num_tokens": 1189101435.0, "step": 6232 }, { "epoch": 2.127325482164192, "grad_norm": 0.23214556777200088, "learning_rate": 1.6154527061203846e-05, "loss": 0.2328, "num_tokens": 1189306366.0, "step": 6233 }, { "epoch": 2.1276668373442567, "grad_norm": 0.26073729272724105, "learning_rate": 1.6148204350025293e-05, "loss": 0.2252, "num_tokens": 1189478303.0, "step": 6234 }, { "epoch": 2.1280081925243217, "grad_norm": 0.25549989046562627, "learning_rate": 1.6141881638846737e-05, "loss": 0.2065, "num_tokens": 1189645431.0, "step": 6235 }, { "epoch": 2.1283495477043863, "grad_norm": 0.23940881202364217, "learning_rate": 1.6135558927668184e-05, "loss": 0.2462, "num_tokens": 1189866701.0, "step": 6236 }, { "epoch": 2.1286909028844514, "grad_norm": 0.23205638667504644, "learning_rate": 1.612923621648963e-05, "loss": 0.2663, "num_tokens": 1190079772.0, "step": 6237 }, { "epoch": 2.129032258064516, "grad_norm": 0.24709646930637985, "learning_rate": 1.6122913505311078e-05, "loss": 0.2486, "num_tokens": 1190281881.0, "step": 6238 }, { "epoch": 2.129373613244581, "grad_norm": 0.23107355980286823, "learning_rate": 1.6116590794132524e-05, "loss": 0.2523, "num_tokens": 1190521470.0, "step": 6239 }, { "epoch": 2.1297149684246457, "grad_norm": 0.24074016962697037, "learning_rate": 1.611026808295397e-05, "loss": 0.236, "num_tokens": 1190697106.0, "step": 6240 }, { "epoch": 2.1300563236047108, "grad_norm": 0.24344224149083027, "learning_rate": 1.610394537177542e-05, "loss": 0.2173, "num_tokens": 1190863152.0, "step": 6241 }, { "epoch": 2.1303976787847754, "grad_norm": 0.2550449721864772, "learning_rate": 1.6097622660596865e-05, "loss": 0.1962, "num_tokens": 1191012819.0, "step": 6242 }, { "epoch": 2.1307390339648404, "grad_norm": 0.2345435851513415, "learning_rate": 1.6091299949418312e-05, "loss": 0.2372, "num_tokens": 1191229684.0, "step": 6243 }, { "epoch": 2.1310803891449055, "grad_norm": 0.235913707720192, "learning_rate": 1.6084977238239756e-05, "loss": 0.2273, "num_tokens": 1191445391.0, "step": 6244 }, { "epoch": 2.13142174432497, "grad_norm": 0.22261884092296616, "learning_rate": 1.6078654527061203e-05, "loss": 0.2066, "num_tokens": 1191623131.0, "step": 6245 }, { "epoch": 2.131763099505035, "grad_norm": 0.24267275699887605, "learning_rate": 1.6072331815882653e-05, "loss": 0.2351, "num_tokens": 1191812474.0, "step": 6246 }, { "epoch": 2.1321044546851, "grad_norm": 0.39702930805415587, "learning_rate": 1.6066009104704097e-05, "loss": 0.2315, "num_tokens": 1192001486.0, "step": 6247 }, { "epoch": 2.132445809865165, "grad_norm": 0.22589697865147815, "learning_rate": 1.6059686393525544e-05, "loss": 0.2498, "num_tokens": 1192213129.0, "step": 6248 }, { "epoch": 2.1327871650452295, "grad_norm": 0.2136164194959399, "learning_rate": 1.605336368234699e-05, "loss": 0.2195, "num_tokens": 1192395062.0, "step": 6249 }, { "epoch": 2.1331285202252945, "grad_norm": 0.23793437647402793, "learning_rate": 1.6047040971168438e-05, "loss": 0.223, "num_tokens": 1192570229.0, "step": 6250 }, { "epoch": 2.133469875405359, "grad_norm": 0.24491912543469666, "learning_rate": 1.6040718259989885e-05, "loss": 0.2203, "num_tokens": 1192745179.0, "step": 6251 }, { "epoch": 2.133811230585424, "grad_norm": 0.2501574574896612, "learning_rate": 1.6034395548811332e-05, "loss": 0.2311, "num_tokens": 1192916875.0, "step": 6252 }, { "epoch": 2.134152585765489, "grad_norm": 0.25577802085824547, "learning_rate": 1.6028072837632775e-05, "loss": 0.2242, "num_tokens": 1193091548.0, "step": 6253 }, { "epoch": 2.134493940945554, "grad_norm": 0.2500520437715548, "learning_rate": 1.6021750126454222e-05, "loss": 0.2195, "num_tokens": 1193258101.0, "step": 6254 }, { "epoch": 2.1348352961256185, "grad_norm": 0.24424190187151626, "learning_rate": 1.6015427415275673e-05, "loss": 0.2334, "num_tokens": 1193456685.0, "step": 6255 }, { "epoch": 2.1351766513056836, "grad_norm": 0.23517197580014043, "learning_rate": 1.6009104704097116e-05, "loss": 0.2132, "num_tokens": 1193640515.0, "step": 6256 }, { "epoch": 2.135518006485748, "grad_norm": 0.23929609324598106, "learning_rate": 1.6002781992918563e-05, "loss": 0.2268, "num_tokens": 1193828700.0, "step": 6257 }, { "epoch": 2.1358593616658132, "grad_norm": 0.24109997534610134, "learning_rate": 1.599645928174001e-05, "loss": 0.229, "num_tokens": 1194014304.0, "step": 6258 }, { "epoch": 2.1362007168458783, "grad_norm": 0.2239088381904376, "learning_rate": 1.5990136570561457e-05, "loss": 0.2225, "num_tokens": 1194225527.0, "step": 6259 }, { "epoch": 2.136542072025943, "grad_norm": 0.19759218271190807, "learning_rate": 1.5983813859382904e-05, "loss": 0.225, "num_tokens": 1194454129.0, "step": 6260 }, { "epoch": 2.136883427206008, "grad_norm": 0.24811165815255506, "learning_rate": 1.597749114820435e-05, "loss": 0.2327, "num_tokens": 1194628949.0, "step": 6261 }, { "epoch": 2.1372247823860726, "grad_norm": 0.2732298922989891, "learning_rate": 1.5971168437025795e-05, "loss": 0.23, "num_tokens": 1194813481.0, "step": 6262 }, { "epoch": 2.1375661375661377, "grad_norm": 0.24600750389841558, "learning_rate": 1.5964845725847245e-05, "loss": 0.2187, "num_tokens": 1194983565.0, "step": 6263 }, { "epoch": 2.1379074927462023, "grad_norm": 0.24212350909485744, "learning_rate": 1.5958523014668692e-05, "loss": 0.2095, "num_tokens": 1195154343.0, "step": 6264 }, { "epoch": 2.1382488479262673, "grad_norm": 0.23115090231439475, "learning_rate": 1.5952200303490136e-05, "loss": 0.2338, "num_tokens": 1195350387.0, "step": 6265 }, { "epoch": 2.138590203106332, "grad_norm": 0.22461479544978405, "learning_rate": 1.5945877592311583e-05, "loss": 0.226, "num_tokens": 1195545200.0, "step": 6266 }, { "epoch": 2.138931558286397, "grad_norm": 0.23538614128219712, "learning_rate": 1.593955488113303e-05, "loss": 0.2415, "num_tokens": 1195760288.0, "step": 6267 }, { "epoch": 2.1392729134664616, "grad_norm": 0.2583855795853475, "learning_rate": 1.5933232169954477e-05, "loss": 0.2415, "num_tokens": 1195947585.0, "step": 6268 }, { "epoch": 2.1396142686465267, "grad_norm": 0.23645343567607577, "learning_rate": 1.5926909458775924e-05, "loss": 0.2405, "num_tokens": 1196144534.0, "step": 6269 }, { "epoch": 2.1399556238265918, "grad_norm": 0.22760710465434766, "learning_rate": 1.592058674759737e-05, "loss": 0.2434, "num_tokens": 1196374265.0, "step": 6270 }, { "epoch": 2.1402969790066564, "grad_norm": 0.22502866999436336, "learning_rate": 1.5914264036418818e-05, "loss": 0.2272, "num_tokens": 1196584294.0, "step": 6271 }, { "epoch": 2.1406383341867214, "grad_norm": 0.2281881492863669, "learning_rate": 1.5907941325240265e-05, "loss": 0.2205, "num_tokens": 1196793487.0, "step": 6272 }, { "epoch": 2.140979689366786, "grad_norm": 0.2379786490324534, "learning_rate": 1.5901618614061712e-05, "loss": 0.2072, "num_tokens": 1196957785.0, "step": 6273 }, { "epoch": 2.141321044546851, "grad_norm": 0.2571733705093986, "learning_rate": 1.5895295902883155e-05, "loss": 0.2316, "num_tokens": 1197126872.0, "step": 6274 }, { "epoch": 2.1416623997269157, "grad_norm": 0.24976901207193333, "learning_rate": 1.5888973191704602e-05, "loss": 0.2377, "num_tokens": 1197303820.0, "step": 6275 }, { "epoch": 2.142003754906981, "grad_norm": 0.2743585111749736, "learning_rate": 1.5882650480526053e-05, "loss": 0.2131, "num_tokens": 1197466770.0, "step": 6276 }, { "epoch": 2.1423451100870454, "grad_norm": 0.22478413916824164, "learning_rate": 1.5876327769347496e-05, "loss": 0.2646, "num_tokens": 1197696990.0, "step": 6277 }, { "epoch": 2.1426864652671105, "grad_norm": 0.23099307741608663, "learning_rate": 1.5870005058168943e-05, "loss": 0.2315, "num_tokens": 1197900560.0, "step": 6278 }, { "epoch": 2.143027820447175, "grad_norm": 0.23624864871553866, "learning_rate": 1.586368234699039e-05, "loss": 0.2232, "num_tokens": 1198105292.0, "step": 6279 }, { "epoch": 2.14336917562724, "grad_norm": 0.22615915801885672, "learning_rate": 1.5857359635811837e-05, "loss": 0.2422, "num_tokens": 1198334037.0, "step": 6280 }, { "epoch": 2.1437105308073052, "grad_norm": 0.2296676736311015, "learning_rate": 1.5851036924633284e-05, "loss": 0.2298, "num_tokens": 1198541397.0, "step": 6281 }, { "epoch": 2.14405188598737, "grad_norm": 0.23892167644615486, "learning_rate": 1.584471421345473e-05, "loss": 0.2359, "num_tokens": 1198758087.0, "step": 6282 }, { "epoch": 2.144393241167435, "grad_norm": 0.24843438470030704, "learning_rate": 1.5838391502276175e-05, "loss": 0.24, "num_tokens": 1198941048.0, "step": 6283 }, { "epoch": 2.1447345963474995, "grad_norm": 0.2628073096275312, "learning_rate": 1.5832068791097622e-05, "loss": 0.2411, "num_tokens": 1199096374.0, "step": 6284 }, { "epoch": 2.1450759515275646, "grad_norm": 0.22206828819416177, "learning_rate": 1.5825746079919072e-05, "loss": 0.2111, "num_tokens": 1199278532.0, "step": 6285 }, { "epoch": 2.145417306707629, "grad_norm": 0.253565704377255, "learning_rate": 1.5819423368740516e-05, "loss": 0.239, "num_tokens": 1199463932.0, "step": 6286 }, { "epoch": 2.1457586618876943, "grad_norm": 0.2684089625656171, "learning_rate": 1.5813100657561963e-05, "loss": 0.2207, "num_tokens": 1199626615.0, "step": 6287 }, { "epoch": 2.146100017067759, "grad_norm": 0.23310750517498696, "learning_rate": 1.580677794638341e-05, "loss": 0.211, "num_tokens": 1199811566.0, "step": 6288 }, { "epoch": 2.146441372247824, "grad_norm": 0.24092309726052238, "learning_rate": 1.5800455235204857e-05, "loss": 0.2358, "num_tokens": 1200010761.0, "step": 6289 }, { "epoch": 2.1467827274278886, "grad_norm": 0.21776410678419167, "learning_rate": 1.5794132524026304e-05, "loss": 0.2294, "num_tokens": 1200228633.0, "step": 6290 }, { "epoch": 2.1471240826079536, "grad_norm": 0.2599275928056117, "learning_rate": 1.578780981284775e-05, "loss": 0.2235, "num_tokens": 1200397194.0, "step": 6291 }, { "epoch": 2.1474654377880182, "grad_norm": 0.2655804946018665, "learning_rate": 1.5781487101669194e-05, "loss": 0.2539, "num_tokens": 1200593483.0, "step": 6292 }, { "epoch": 2.1478067929680833, "grad_norm": 0.26965935594907636, "learning_rate": 1.5775164390490645e-05, "loss": 0.2293, "num_tokens": 1200746336.0, "step": 6293 }, { "epoch": 2.148148148148148, "grad_norm": 0.23479522648242926, "learning_rate": 1.576884167931209e-05, "loss": 0.25, "num_tokens": 1200982800.0, "step": 6294 }, { "epoch": 2.148489503328213, "grad_norm": 0.207370363379193, "learning_rate": 1.5762518968133535e-05, "loss": 0.2194, "num_tokens": 1201195655.0, "step": 6295 }, { "epoch": 2.148830858508278, "grad_norm": 0.2493853400459528, "learning_rate": 1.5756196256954982e-05, "loss": 0.208, "num_tokens": 1201372337.0, "step": 6296 }, { "epoch": 2.1491722136883427, "grad_norm": 0.2500081053697676, "learning_rate": 1.574987354577643e-05, "loss": 0.2004, "num_tokens": 1201537350.0, "step": 6297 }, { "epoch": 2.1495135688684077, "grad_norm": 0.23538716039888513, "learning_rate": 1.5743550834597876e-05, "loss": 0.2074, "num_tokens": 1201719283.0, "step": 6298 }, { "epoch": 2.1498549240484723, "grad_norm": 0.21808122548665856, "learning_rate": 1.5737228123419323e-05, "loss": 0.2313, "num_tokens": 1201932374.0, "step": 6299 }, { "epoch": 2.1501962792285374, "grad_norm": 0.233617506506856, "learning_rate": 1.573090541224077e-05, "loss": 0.2651, "num_tokens": 1202154123.0, "step": 6300 }, { "epoch": 2.150537634408602, "grad_norm": 0.24644738931119844, "learning_rate": 1.5724582701062217e-05, "loss": 0.2107, "num_tokens": 1202319727.0, "step": 6301 }, { "epoch": 2.150878989588667, "grad_norm": 0.24318222893646074, "learning_rate": 1.5718259989883664e-05, "loss": 0.2564, "num_tokens": 1202511125.0, "step": 6302 }, { "epoch": 2.1512203447687317, "grad_norm": 0.2506777395394145, "learning_rate": 1.571193727870511e-05, "loss": 0.2502, "num_tokens": 1202709112.0, "step": 6303 }, { "epoch": 2.1515616999487968, "grad_norm": 0.2157225786539978, "learning_rate": 1.5705614567526555e-05, "loss": 0.2466, "num_tokens": 1202949155.0, "step": 6304 }, { "epoch": 2.1519030551288614, "grad_norm": 0.23228916563137467, "learning_rate": 1.5699291856348e-05, "loss": 0.2283, "num_tokens": 1203121966.0, "step": 6305 }, { "epoch": 2.1522444103089264, "grad_norm": 0.24756773050685987, "learning_rate": 1.5692969145169452e-05, "loss": 0.2274, "num_tokens": 1203320687.0, "step": 6306 }, { "epoch": 2.1525857654889915, "grad_norm": 0.24137148113183468, "learning_rate": 1.5686646433990896e-05, "loss": 0.2458, "num_tokens": 1203514236.0, "step": 6307 }, { "epoch": 2.152927120669056, "grad_norm": 0.21081004607036233, "learning_rate": 1.5680323722812343e-05, "loss": 0.2271, "num_tokens": 1203713127.0, "step": 6308 }, { "epoch": 2.153268475849121, "grad_norm": 0.22726183523286383, "learning_rate": 1.567400101163379e-05, "loss": 0.2373, "num_tokens": 1203924660.0, "step": 6309 }, { "epoch": 2.153609831029186, "grad_norm": 0.285986162808583, "learning_rate": 1.5667678300455237e-05, "loss": 0.2192, "num_tokens": 1204072574.0, "step": 6310 }, { "epoch": 2.153951186209251, "grad_norm": 0.23888723482396707, "learning_rate": 1.5661355589276684e-05, "loss": 0.2465, "num_tokens": 1204270115.0, "step": 6311 }, { "epoch": 2.1542925413893155, "grad_norm": 0.23920190126364801, "learning_rate": 1.565503287809813e-05, "loss": 0.2491, "num_tokens": 1204486141.0, "step": 6312 }, { "epoch": 2.1546338965693805, "grad_norm": 0.4439981930027745, "learning_rate": 1.5648710166919574e-05, "loss": 0.2434, "num_tokens": 1204674873.0, "step": 6313 }, { "epoch": 2.154975251749445, "grad_norm": 0.20779488653629197, "learning_rate": 1.564238745574102e-05, "loss": 0.234, "num_tokens": 1204916939.0, "step": 6314 }, { "epoch": 2.15531660692951, "grad_norm": 0.23548294436585443, "learning_rate": 1.563606474456247e-05, "loss": 0.2534, "num_tokens": 1205130816.0, "step": 6315 }, { "epoch": 2.155657962109575, "grad_norm": 0.2360304874540804, "learning_rate": 1.5629742033383915e-05, "loss": 0.2254, "num_tokens": 1205334628.0, "step": 6316 }, { "epoch": 2.15599931728964, "grad_norm": 0.23855133498575684, "learning_rate": 1.5623419322205362e-05, "loss": 0.2199, "num_tokens": 1205502795.0, "step": 6317 }, { "epoch": 2.156340672469705, "grad_norm": 0.2421399793804712, "learning_rate": 1.561709661102681e-05, "loss": 0.2318, "num_tokens": 1205699357.0, "step": 6318 }, { "epoch": 2.1566820276497696, "grad_norm": 0.2386383951670224, "learning_rate": 1.5610773899848256e-05, "loss": 0.2353, "num_tokens": 1205876579.0, "step": 6319 }, { "epoch": 2.1570233828298346, "grad_norm": 0.22701753172940595, "learning_rate": 1.5604451188669703e-05, "loss": 0.2074, "num_tokens": 1206061583.0, "step": 6320 }, { "epoch": 2.1573647380098993, "grad_norm": 0.2525464517795824, "learning_rate": 1.559812847749115e-05, "loss": 0.2269, "num_tokens": 1206238960.0, "step": 6321 }, { "epoch": 2.1577060931899643, "grad_norm": 0.2546591201393261, "learning_rate": 1.5591805766312594e-05, "loss": 0.2245, "num_tokens": 1206405086.0, "step": 6322 }, { "epoch": 2.158047448370029, "grad_norm": 0.2423507006174903, "learning_rate": 1.5585483055134044e-05, "loss": 0.2137, "num_tokens": 1206598129.0, "step": 6323 }, { "epoch": 2.158388803550094, "grad_norm": 0.24177332875136584, "learning_rate": 1.557916034395549e-05, "loss": 0.2165, "num_tokens": 1206754118.0, "step": 6324 }, { "epoch": 2.1587301587301586, "grad_norm": 0.23266990820910613, "learning_rate": 1.5572837632776935e-05, "loss": 0.2511, "num_tokens": 1206959907.0, "step": 6325 }, { "epoch": 2.1590715139102237, "grad_norm": 0.3006640998588078, "learning_rate": 1.556651492159838e-05, "loss": 0.2295, "num_tokens": 1207168923.0, "step": 6326 }, { "epoch": 2.1594128690902883, "grad_norm": 0.2346373418163303, "learning_rate": 1.556019221041983e-05, "loss": 0.2217, "num_tokens": 1207332457.0, "step": 6327 }, { "epoch": 2.1597542242703534, "grad_norm": 0.2430727842976598, "learning_rate": 1.5553869499241275e-05, "loss": 0.262, "num_tokens": 1207547660.0, "step": 6328 }, { "epoch": 2.160095579450418, "grad_norm": 0.22792288898051266, "learning_rate": 1.5547546788062722e-05, "loss": 0.2507, "num_tokens": 1207763069.0, "step": 6329 }, { "epoch": 2.160436934630483, "grad_norm": 0.22837792799593726, "learning_rate": 1.554122407688417e-05, "loss": 0.2395, "num_tokens": 1207995909.0, "step": 6330 }, { "epoch": 2.1607782898105476, "grad_norm": 0.22750235071534947, "learning_rate": 1.5534901365705613e-05, "loss": 0.2231, "num_tokens": 1208205725.0, "step": 6331 }, { "epoch": 2.1611196449906127, "grad_norm": 0.2302460581442118, "learning_rate": 1.5528578654527063e-05, "loss": 0.2202, "num_tokens": 1208396253.0, "step": 6332 }, { "epoch": 2.1614610001706778, "grad_norm": 0.2330959632864122, "learning_rate": 1.552225594334851e-05, "loss": 0.2436, "num_tokens": 1208590791.0, "step": 6333 }, { "epoch": 2.1618023553507424, "grad_norm": 0.22099993307474167, "learning_rate": 1.5515933232169954e-05, "loss": 0.2363, "num_tokens": 1208791092.0, "step": 6334 }, { "epoch": 2.1621437105308074, "grad_norm": 0.24513555491819874, "learning_rate": 1.55096105209914e-05, "loss": 0.2142, "num_tokens": 1208953488.0, "step": 6335 }, { "epoch": 2.162485065710872, "grad_norm": 0.2551688503215512, "learning_rate": 1.550328780981285e-05, "loss": 0.2218, "num_tokens": 1209116047.0, "step": 6336 }, { "epoch": 2.162826420890937, "grad_norm": 0.22564306234752754, "learning_rate": 1.5496965098634295e-05, "loss": 0.236, "num_tokens": 1209318813.0, "step": 6337 }, { "epoch": 2.1631677760710017, "grad_norm": 0.26287256992029845, "learning_rate": 1.5490642387455742e-05, "loss": 0.215, "num_tokens": 1209517506.0, "step": 6338 }, { "epoch": 2.163509131251067, "grad_norm": 0.23074286805796915, "learning_rate": 1.548431967627719e-05, "loss": 0.2304, "num_tokens": 1209717997.0, "step": 6339 }, { "epoch": 2.1638504864311314, "grad_norm": 0.2632124645663727, "learning_rate": 1.5477996965098636e-05, "loss": 0.205, "num_tokens": 1209866489.0, "step": 6340 }, { "epoch": 2.1641918416111965, "grad_norm": 0.2568749168766029, "learning_rate": 1.5471674253920083e-05, "loss": 0.2371, "num_tokens": 1210054391.0, "step": 6341 }, { "epoch": 2.164533196791261, "grad_norm": 0.23737065839161134, "learning_rate": 1.546535154274153e-05, "loss": 0.2059, "num_tokens": 1210222468.0, "step": 6342 }, { "epoch": 2.164874551971326, "grad_norm": 0.2742357910302263, "learning_rate": 1.5459028831562973e-05, "loss": 0.2508, "num_tokens": 1210394941.0, "step": 6343 }, { "epoch": 2.1652159071513912, "grad_norm": 0.25156450555608173, "learning_rate": 1.545270612038442e-05, "loss": 0.2369, "num_tokens": 1210559200.0, "step": 6344 }, { "epoch": 2.165557262331456, "grad_norm": 0.20695715563469821, "learning_rate": 1.544638340920587e-05, "loss": 0.2412, "num_tokens": 1210802232.0, "step": 6345 }, { "epoch": 2.165898617511521, "grad_norm": 0.23443842472384757, "learning_rate": 1.5440060698027314e-05, "loss": 0.2182, "num_tokens": 1210980077.0, "step": 6346 }, { "epoch": 2.1662399726915855, "grad_norm": 0.23753807728539397, "learning_rate": 1.543373798684876e-05, "loss": 0.216, "num_tokens": 1211173020.0, "step": 6347 }, { "epoch": 2.1665813278716506, "grad_norm": 0.24573940601646438, "learning_rate": 1.542741527567021e-05, "loss": 0.2398, "num_tokens": 1211388690.0, "step": 6348 }, { "epoch": 2.166922683051715, "grad_norm": 0.24670988458327966, "learning_rate": 1.5421092564491655e-05, "loss": 0.2572, "num_tokens": 1211580096.0, "step": 6349 }, { "epoch": 2.1672640382317803, "grad_norm": 0.23542218605201526, "learning_rate": 1.5414769853313102e-05, "loss": 0.245, "num_tokens": 1211780768.0, "step": 6350 }, { "epoch": 2.167605393411845, "grad_norm": 0.22348309376461373, "learning_rate": 1.5408447142134546e-05, "loss": 0.2165, "num_tokens": 1211969691.0, "step": 6351 }, { "epoch": 2.16794674859191, "grad_norm": 0.27452713802700673, "learning_rate": 1.5402124430955993e-05, "loss": 0.2161, "num_tokens": 1212114442.0, "step": 6352 }, { "epoch": 2.1682881037719746, "grad_norm": 0.24891793304471418, "learning_rate": 1.5395801719777443e-05, "loss": 0.2244, "num_tokens": 1212302299.0, "step": 6353 }, { "epoch": 2.1686294589520396, "grad_norm": 0.24056305452019136, "learning_rate": 1.5389479008598887e-05, "loss": 0.217, "num_tokens": 1212488267.0, "step": 6354 }, { "epoch": 2.1689708141321047, "grad_norm": 0.2458820693932368, "learning_rate": 1.5383156297420334e-05, "loss": 0.2185, "num_tokens": 1212674586.0, "step": 6355 }, { "epoch": 2.1693121693121693, "grad_norm": 0.259113603798691, "learning_rate": 1.537683358624178e-05, "loss": 0.2338, "num_tokens": 1212859627.0, "step": 6356 }, { "epoch": 2.1696535244922344, "grad_norm": 0.2301538049707572, "learning_rate": 1.5370510875063228e-05, "loss": 0.2649, "num_tokens": 1213110622.0, "step": 6357 }, { "epoch": 2.169994879672299, "grad_norm": 0.2859946757826733, "learning_rate": 1.5364188163884675e-05, "loss": 0.2667, "num_tokens": 1213302569.0, "step": 6358 }, { "epoch": 2.170336234852364, "grad_norm": 0.23265675591712678, "learning_rate": 1.5357865452706122e-05, "loss": 0.2353, "num_tokens": 1213506526.0, "step": 6359 }, { "epoch": 2.1706775900324287, "grad_norm": 0.24646174112918628, "learning_rate": 1.5351542741527565e-05, "loss": 0.2202, "num_tokens": 1213671274.0, "step": 6360 }, { "epoch": 2.1710189452124937, "grad_norm": 0.22615465243780108, "learning_rate": 1.5345220030349012e-05, "loss": 0.2199, "num_tokens": 1213860326.0, "step": 6361 }, { "epoch": 2.1713603003925583, "grad_norm": 0.25021061201737504, "learning_rate": 1.5338897319170463e-05, "loss": 0.2133, "num_tokens": 1214029016.0, "step": 6362 }, { "epoch": 2.1717016555726234, "grad_norm": 0.2314958496004651, "learning_rate": 1.5332574607991906e-05, "loss": 0.2603, "num_tokens": 1214253711.0, "step": 6363 }, { "epoch": 2.172043010752688, "grad_norm": 0.22892742156206014, "learning_rate": 1.5326251896813353e-05, "loss": 0.2313, "num_tokens": 1214460018.0, "step": 6364 }, { "epoch": 2.172384365932753, "grad_norm": 0.24870078195791517, "learning_rate": 1.53199291856348e-05, "loss": 0.2278, "num_tokens": 1214645462.0, "step": 6365 }, { "epoch": 2.1727257211128177, "grad_norm": 0.2515615466545834, "learning_rate": 1.5313606474456247e-05, "loss": 0.2374, "num_tokens": 1214842038.0, "step": 6366 }, { "epoch": 2.1730670762928828, "grad_norm": 0.24851538929923722, "learning_rate": 1.5307283763277694e-05, "loss": 0.2647, "num_tokens": 1215051882.0, "step": 6367 }, { "epoch": 2.1734084314729474, "grad_norm": 0.2364338131482246, "learning_rate": 1.530096105209914e-05, "loss": 0.2261, "num_tokens": 1215256286.0, "step": 6368 }, { "epoch": 2.1737497866530124, "grad_norm": 0.22437506947337904, "learning_rate": 1.5294638340920585e-05, "loss": 0.2254, "num_tokens": 1215449089.0, "step": 6369 }, { "epoch": 2.1740911418330775, "grad_norm": 0.28487669548953565, "learning_rate": 1.5288315629742035e-05, "loss": 0.23, "num_tokens": 1215602533.0, "step": 6370 }, { "epoch": 2.174432497013142, "grad_norm": 0.2759007799748349, "learning_rate": 1.5281992918563482e-05, "loss": 0.2295, "num_tokens": 1215781347.0, "step": 6371 }, { "epoch": 2.174773852193207, "grad_norm": 0.253232348119423, "learning_rate": 1.5275670207384926e-05, "loss": 0.2251, "num_tokens": 1215952040.0, "step": 6372 }, { "epoch": 2.175115207373272, "grad_norm": 0.2378639099120982, "learning_rate": 1.5269347496206373e-05, "loss": 0.219, "num_tokens": 1216125356.0, "step": 6373 }, { "epoch": 2.175456562553337, "grad_norm": 0.219470599257248, "learning_rate": 1.526302478502782e-05, "loss": 0.2167, "num_tokens": 1216333461.0, "step": 6374 }, { "epoch": 2.1757979177334015, "grad_norm": 0.2456177708557333, "learning_rate": 1.5256702073849267e-05, "loss": 0.2245, "num_tokens": 1216514997.0, "step": 6375 }, { "epoch": 2.1761392729134665, "grad_norm": 0.24256321077691034, "learning_rate": 1.5250379362670714e-05, "loss": 0.2237, "num_tokens": 1216694724.0, "step": 6376 }, { "epoch": 2.176480628093531, "grad_norm": 0.2130427411981663, "learning_rate": 1.524405665149216e-05, "loss": 0.2295, "num_tokens": 1216898273.0, "step": 6377 }, { "epoch": 2.176821983273596, "grad_norm": 0.25602064406349206, "learning_rate": 1.5237733940313606e-05, "loss": 0.2544, "num_tokens": 1217088149.0, "step": 6378 }, { "epoch": 2.177163338453661, "grad_norm": 0.24998194810796415, "learning_rate": 1.5231411229135053e-05, "loss": 0.2557, "num_tokens": 1217264042.0, "step": 6379 }, { "epoch": 2.177504693633726, "grad_norm": 0.24499386418599658, "learning_rate": 1.5225088517956502e-05, "loss": 0.2204, "num_tokens": 1217447941.0, "step": 6380 }, { "epoch": 2.177846048813791, "grad_norm": 0.26596114631671075, "learning_rate": 1.5218765806777945e-05, "loss": 0.2511, "num_tokens": 1217629260.0, "step": 6381 }, { "epoch": 2.1781874039938556, "grad_norm": 0.24245023662269974, "learning_rate": 1.5212443095599394e-05, "loss": 0.2532, "num_tokens": 1217824604.0, "step": 6382 }, { "epoch": 2.1785287591739206, "grad_norm": 0.22391570909225408, "learning_rate": 1.5206120384420841e-05, "loss": 0.2146, "num_tokens": 1218006997.0, "step": 6383 }, { "epoch": 2.1788701143539853, "grad_norm": 0.24637172141239033, "learning_rate": 1.5199797673242286e-05, "loss": 0.2185, "num_tokens": 1218172524.0, "step": 6384 }, { "epoch": 2.1792114695340503, "grad_norm": 0.22728404362799193, "learning_rate": 1.5193474962063733e-05, "loss": 0.2522, "num_tokens": 1218395865.0, "step": 6385 }, { "epoch": 2.179552824714115, "grad_norm": 0.22954925990423342, "learning_rate": 1.518715225088518e-05, "loss": 0.2288, "num_tokens": 1218608269.0, "step": 6386 }, { "epoch": 2.17989417989418, "grad_norm": 0.22891511391650565, "learning_rate": 1.5180829539706625e-05, "loss": 0.2414, "num_tokens": 1218810692.0, "step": 6387 }, { "epoch": 2.1802355350742446, "grad_norm": 0.24935403083968527, "learning_rate": 1.5174506828528074e-05, "loss": 0.2718, "num_tokens": 1219009072.0, "step": 6388 }, { "epoch": 2.1805768902543097, "grad_norm": 0.2325272318347388, "learning_rate": 1.5168184117349521e-05, "loss": 0.2149, "num_tokens": 1219212219.0, "step": 6389 }, { "epoch": 2.1809182454343743, "grad_norm": 0.25020013453118706, "learning_rate": 1.5161861406170966e-05, "loss": 0.2308, "num_tokens": 1219395754.0, "step": 6390 }, { "epoch": 2.1812596006144394, "grad_norm": 0.2130505609949678, "learning_rate": 1.5155538694992413e-05, "loss": 0.2185, "num_tokens": 1219598401.0, "step": 6391 }, { "epoch": 2.1816009557945044, "grad_norm": 0.22404370076699687, "learning_rate": 1.514921598381386e-05, "loss": 0.2195, "num_tokens": 1219805770.0, "step": 6392 }, { "epoch": 2.181942310974569, "grad_norm": 0.2821794607042456, "learning_rate": 1.5142893272635306e-05, "loss": 0.2353, "num_tokens": 1219986004.0, "step": 6393 }, { "epoch": 2.182283666154634, "grad_norm": 0.24911561741946142, "learning_rate": 1.5136570561456753e-05, "loss": 0.25, "num_tokens": 1220187191.0, "step": 6394 }, { "epoch": 2.1826250213346987, "grad_norm": 0.22504585979687328, "learning_rate": 1.5130247850278201e-05, "loss": 0.2161, "num_tokens": 1220388051.0, "step": 6395 }, { "epoch": 2.1829663765147638, "grad_norm": 0.2373838823235707, "learning_rate": 1.5123925139099645e-05, "loss": 0.2116, "num_tokens": 1220564008.0, "step": 6396 }, { "epoch": 2.1833077316948284, "grad_norm": 0.2311229222873269, "learning_rate": 1.5117602427921094e-05, "loss": 0.2017, "num_tokens": 1220742045.0, "step": 6397 }, { "epoch": 2.1836490868748935, "grad_norm": 0.2841286921220819, "learning_rate": 1.511127971674254e-05, "loss": 0.2271, "num_tokens": 1220893333.0, "step": 6398 }, { "epoch": 2.183990442054958, "grad_norm": 0.2399045368794875, "learning_rate": 1.5104957005563986e-05, "loss": 0.2375, "num_tokens": 1221100143.0, "step": 6399 }, { "epoch": 2.184331797235023, "grad_norm": 0.23875798005271695, "learning_rate": 1.5098634294385433e-05, "loss": 0.2487, "num_tokens": 1221317095.0, "step": 6400 }, { "epoch": 2.1846731524150877, "grad_norm": 0.22738907454656435, "learning_rate": 1.509231158320688e-05, "loss": 0.1818, "num_tokens": 1221475656.0, "step": 6401 }, { "epoch": 2.185014507595153, "grad_norm": 0.2389374118789683, "learning_rate": 1.5085988872028325e-05, "loss": 0.24, "num_tokens": 1221675315.0, "step": 6402 }, { "epoch": 2.1853558627752174, "grad_norm": 0.2454678119410655, "learning_rate": 1.5079666160849774e-05, "loss": 0.2332, "num_tokens": 1221861479.0, "step": 6403 }, { "epoch": 2.1856972179552825, "grad_norm": 0.23412534627623596, "learning_rate": 1.507334344967122e-05, "loss": 0.2157, "num_tokens": 1222048954.0, "step": 6404 }, { "epoch": 2.186038573135347, "grad_norm": 0.24249920342220943, "learning_rate": 1.5067020738492666e-05, "loss": 0.2116, "num_tokens": 1222200015.0, "step": 6405 }, { "epoch": 2.186379928315412, "grad_norm": 0.247936302349051, "learning_rate": 1.5060698027314113e-05, "loss": 0.2045, "num_tokens": 1222369800.0, "step": 6406 }, { "epoch": 2.1867212834954772, "grad_norm": 0.2348951000783045, "learning_rate": 1.505437531613556e-05, "loss": 0.2144, "num_tokens": 1222548061.0, "step": 6407 }, { "epoch": 2.187062638675542, "grad_norm": 0.260737973552973, "learning_rate": 1.5048052604957005e-05, "loss": 0.2512, "num_tokens": 1222752674.0, "step": 6408 }, { "epoch": 2.187403993855607, "grad_norm": 0.2508163410090243, "learning_rate": 1.5041729893778452e-05, "loss": 0.2116, "num_tokens": 1222924136.0, "step": 6409 }, { "epoch": 2.1877453490356715, "grad_norm": 0.2309477559609958, "learning_rate": 1.5035407182599901e-05, "loss": 0.2349, "num_tokens": 1223126330.0, "step": 6410 }, { "epoch": 2.1880867042157366, "grad_norm": 0.21494063716368872, "learning_rate": 1.5029084471421345e-05, "loss": 0.2254, "num_tokens": 1223348651.0, "step": 6411 }, { "epoch": 2.188428059395801, "grad_norm": 0.2357179212800759, "learning_rate": 1.5022761760242793e-05, "loss": 0.2403, "num_tokens": 1223535361.0, "step": 6412 }, { "epoch": 2.1887694145758663, "grad_norm": 0.24021015719268113, "learning_rate": 1.501643904906424e-05, "loss": 0.2325, "num_tokens": 1223720296.0, "step": 6413 }, { "epoch": 2.189110769755931, "grad_norm": 0.21693343458433115, "learning_rate": 1.5010116337885686e-05, "loss": 0.2177, "num_tokens": 1223914512.0, "step": 6414 }, { "epoch": 2.189452124935996, "grad_norm": 0.22950491298479828, "learning_rate": 1.5003793626707133e-05, "loss": 0.2377, "num_tokens": 1224115426.0, "step": 6415 }, { "epoch": 2.1897934801160606, "grad_norm": 0.2660166859019165, "learning_rate": 1.499747091552858e-05, "loss": 0.2211, "num_tokens": 1224271344.0, "step": 6416 }, { "epoch": 2.1901348352961256, "grad_norm": 0.22764181845363124, "learning_rate": 1.4991148204350025e-05, "loss": 0.2567, "num_tokens": 1224495392.0, "step": 6417 }, { "epoch": 2.1904761904761907, "grad_norm": 0.2498819936368491, "learning_rate": 1.4984825493171473e-05, "loss": 0.2281, "num_tokens": 1224666441.0, "step": 6418 }, { "epoch": 2.1908175456562553, "grad_norm": 0.26056219209602005, "learning_rate": 1.497850278199292e-05, "loss": 0.2003, "num_tokens": 1224815000.0, "step": 6419 }, { "epoch": 2.1911589008363204, "grad_norm": 0.21504507904012432, "learning_rate": 1.4972180070814366e-05, "loss": 0.2117, "num_tokens": 1225011441.0, "step": 6420 }, { "epoch": 2.191500256016385, "grad_norm": 0.245398818796118, "learning_rate": 1.4965857359635813e-05, "loss": 0.249, "num_tokens": 1225205507.0, "step": 6421 }, { "epoch": 2.19184161119645, "grad_norm": 0.21968953005575426, "learning_rate": 1.495953464845726e-05, "loss": 0.2303, "num_tokens": 1225429206.0, "step": 6422 }, { "epoch": 2.1921829663765147, "grad_norm": 0.21619314539703868, "learning_rate": 1.4953211937278705e-05, "loss": 0.224, "num_tokens": 1225635251.0, "step": 6423 }, { "epoch": 2.1925243215565797, "grad_norm": 0.25513096373156263, "learning_rate": 1.4946889226100152e-05, "loss": 0.2551, "num_tokens": 1225824887.0, "step": 6424 }, { "epoch": 2.1928656767366443, "grad_norm": 0.24753999230327245, "learning_rate": 1.49405665149216e-05, "loss": 0.2177, "num_tokens": 1225994939.0, "step": 6425 }, { "epoch": 2.1932070319167094, "grad_norm": 0.22869581737886266, "learning_rate": 1.4934243803743044e-05, "loss": 0.2316, "num_tokens": 1226194798.0, "step": 6426 }, { "epoch": 2.193548387096774, "grad_norm": 0.2281810503215017, "learning_rate": 1.4927921092564493e-05, "loss": 0.2393, "num_tokens": 1226391607.0, "step": 6427 }, { "epoch": 2.193889742276839, "grad_norm": 0.24007373677694185, "learning_rate": 1.492159838138594e-05, "loss": 0.198, "num_tokens": 1226559188.0, "step": 6428 }, { "epoch": 2.1942310974569037, "grad_norm": 0.24355185183095968, "learning_rate": 1.4915275670207385e-05, "loss": 0.2281, "num_tokens": 1226739119.0, "step": 6429 }, { "epoch": 2.1945724526369688, "grad_norm": 0.2726190891641209, "learning_rate": 1.4908952959028832e-05, "loss": 0.2586, "num_tokens": 1226904780.0, "step": 6430 }, { "epoch": 2.194913807817034, "grad_norm": 0.24241432174670913, "learning_rate": 1.490263024785028e-05, "loss": 0.2217, "num_tokens": 1227111718.0, "step": 6431 }, { "epoch": 2.1952551629970984, "grad_norm": 0.23050090257888353, "learning_rate": 1.4896307536671724e-05, "loss": 0.2461, "num_tokens": 1227324747.0, "step": 6432 }, { "epoch": 2.1955965181771635, "grad_norm": 0.2786475725092725, "learning_rate": 1.4889984825493173e-05, "loss": 0.2254, "num_tokens": 1227511611.0, "step": 6433 }, { "epoch": 2.195937873357228, "grad_norm": 0.23360820195296284, "learning_rate": 1.488366211431462e-05, "loss": 0.2366, "num_tokens": 1227705927.0, "step": 6434 }, { "epoch": 2.196279228537293, "grad_norm": 0.23452566879660422, "learning_rate": 1.4877339403136065e-05, "loss": 0.2001, "num_tokens": 1227874193.0, "step": 6435 }, { "epoch": 2.196620583717358, "grad_norm": 0.2405972664369677, "learning_rate": 1.4871016691957512e-05, "loss": 0.2266, "num_tokens": 1228069462.0, "step": 6436 }, { "epoch": 2.196961938897423, "grad_norm": 0.23898180355322088, "learning_rate": 1.486469398077896e-05, "loss": 0.243, "num_tokens": 1228245778.0, "step": 6437 }, { "epoch": 2.1973032940774875, "grad_norm": 0.2426331004388483, "learning_rate": 1.4858371269600405e-05, "loss": 0.246, "num_tokens": 1228461429.0, "step": 6438 }, { "epoch": 2.1976446492575525, "grad_norm": 0.23309171793552266, "learning_rate": 1.4852048558421852e-05, "loss": 0.2344, "num_tokens": 1228659591.0, "step": 6439 }, { "epoch": 2.197986004437617, "grad_norm": 0.216893731732529, "learning_rate": 1.48457258472433e-05, "loss": 0.2488, "num_tokens": 1228882253.0, "step": 6440 }, { "epoch": 2.198327359617682, "grad_norm": 0.22883645477510786, "learning_rate": 1.4839403136064744e-05, "loss": 0.2383, "num_tokens": 1229081434.0, "step": 6441 }, { "epoch": 2.198668714797747, "grad_norm": 0.2445489809320483, "learning_rate": 1.4833080424886193e-05, "loss": 0.2063, "num_tokens": 1229248515.0, "step": 6442 }, { "epoch": 2.199010069977812, "grad_norm": 0.2509889225621226, "learning_rate": 1.482675771370764e-05, "loss": 0.2321, "num_tokens": 1229421072.0, "step": 6443 }, { "epoch": 2.199351425157877, "grad_norm": 0.22957948775192497, "learning_rate": 1.4820435002529085e-05, "loss": 0.2155, "num_tokens": 1229605175.0, "step": 6444 }, { "epoch": 2.1996927803379416, "grad_norm": 0.26909889782650775, "learning_rate": 1.4814112291350532e-05, "loss": 0.2374, "num_tokens": 1229756299.0, "step": 6445 }, { "epoch": 2.2000341355180066, "grad_norm": 0.25485416487818713, "learning_rate": 1.4807789580171979e-05, "loss": 0.2115, "num_tokens": 1229911490.0, "step": 6446 }, { "epoch": 2.2003754906980713, "grad_norm": 0.22767945794108974, "learning_rate": 1.4801466868993424e-05, "loss": 0.2149, "num_tokens": 1230084472.0, "step": 6447 }, { "epoch": 2.2007168458781363, "grad_norm": 0.24593430655690865, "learning_rate": 1.4795144157814871e-05, "loss": 0.2502, "num_tokens": 1230301425.0, "step": 6448 }, { "epoch": 2.201058201058201, "grad_norm": 0.24161187800702164, "learning_rate": 1.478882144663632e-05, "loss": 0.221, "num_tokens": 1230464813.0, "step": 6449 }, { "epoch": 2.201399556238266, "grad_norm": 0.23376534028095186, "learning_rate": 1.4782498735457765e-05, "loss": 0.206, "num_tokens": 1230626778.0, "step": 6450 }, { "epoch": 2.2017409114183306, "grad_norm": 0.23339963210268166, "learning_rate": 1.4776176024279212e-05, "loss": 0.2418, "num_tokens": 1230830777.0, "step": 6451 }, { "epoch": 2.2020822665983957, "grad_norm": 0.21996823244741182, "learning_rate": 1.4769853313100659e-05, "loss": 0.2496, "num_tokens": 1231066319.0, "step": 6452 }, { "epoch": 2.2024236217784603, "grad_norm": 0.2608324255220903, "learning_rate": 1.4763530601922104e-05, "loss": 0.2169, "num_tokens": 1231230796.0, "step": 6453 }, { "epoch": 2.2027649769585254, "grad_norm": 0.23909883703110332, "learning_rate": 1.4757207890743551e-05, "loss": 0.2299, "num_tokens": 1231431911.0, "step": 6454 }, { "epoch": 2.2031063321385904, "grad_norm": 0.2567989338398084, "learning_rate": 1.4750885179565e-05, "loss": 0.2737, "num_tokens": 1231641946.0, "step": 6455 }, { "epoch": 2.203447687318655, "grad_norm": 0.23896374458372327, "learning_rate": 1.4744562468386444e-05, "loss": 0.2245, "num_tokens": 1231844263.0, "step": 6456 }, { "epoch": 2.20378904249872, "grad_norm": 0.23598664140389072, "learning_rate": 1.4738239757207892e-05, "loss": 0.2448, "num_tokens": 1232066271.0, "step": 6457 }, { "epoch": 2.2041303976787847, "grad_norm": 0.25575842126366743, "learning_rate": 1.473191704602934e-05, "loss": 0.2438, "num_tokens": 1232256221.0, "step": 6458 }, { "epoch": 2.2044717528588498, "grad_norm": 0.2271255972318094, "learning_rate": 1.4725594334850785e-05, "loss": 0.2416, "num_tokens": 1232451972.0, "step": 6459 }, { "epoch": 2.2048131080389144, "grad_norm": 0.24288828459054176, "learning_rate": 1.4719271623672232e-05, "loss": 0.2037, "num_tokens": 1232608199.0, "step": 6460 }, { "epoch": 2.2051544632189795, "grad_norm": 0.233134253670076, "learning_rate": 1.4712948912493679e-05, "loss": 0.2148, "num_tokens": 1232797589.0, "step": 6461 }, { "epoch": 2.205495818399044, "grad_norm": 0.2288165694027643, "learning_rate": 1.4706626201315124e-05, "loss": 0.2411, "num_tokens": 1233021855.0, "step": 6462 }, { "epoch": 2.205837173579109, "grad_norm": 0.24502972756056005, "learning_rate": 1.470030349013657e-05, "loss": 0.2239, "num_tokens": 1233204918.0, "step": 6463 }, { "epoch": 2.2061785287591738, "grad_norm": 0.2603344897160217, "learning_rate": 1.469398077895802e-05, "loss": 0.2173, "num_tokens": 1233381796.0, "step": 6464 }, { "epoch": 2.206519883939239, "grad_norm": 0.21696784870714855, "learning_rate": 1.4687658067779465e-05, "loss": 0.2255, "num_tokens": 1233614680.0, "step": 6465 }, { "epoch": 2.2068612391193034, "grad_norm": 0.2598234046681322, "learning_rate": 1.4681335356600912e-05, "loss": 0.2367, "num_tokens": 1233795400.0, "step": 6466 }, { "epoch": 2.2072025942993685, "grad_norm": 0.23812209427745837, "learning_rate": 1.4675012645422359e-05, "loss": 0.2329, "num_tokens": 1233978359.0, "step": 6467 }, { "epoch": 2.2075439494794336, "grad_norm": 0.24385660186948702, "learning_rate": 1.4668689934243804e-05, "loss": 0.2278, "num_tokens": 1234167195.0, "step": 6468 }, { "epoch": 2.207885304659498, "grad_norm": 0.23138352234234477, "learning_rate": 1.4662367223065251e-05, "loss": 0.2368, "num_tokens": 1234371949.0, "step": 6469 }, { "epoch": 2.2082266598395632, "grad_norm": 0.2645968393865548, "learning_rate": 1.46560445118867e-05, "loss": 0.2185, "num_tokens": 1234523904.0, "step": 6470 }, { "epoch": 2.208568015019628, "grad_norm": 0.24881274814240728, "learning_rate": 1.4649721800708143e-05, "loss": 0.2527, "num_tokens": 1234710802.0, "step": 6471 }, { "epoch": 2.208909370199693, "grad_norm": 0.23416778083162443, "learning_rate": 1.4643399089529592e-05, "loss": 0.2247, "num_tokens": 1234907377.0, "step": 6472 }, { "epoch": 2.2092507253797575, "grad_norm": 0.22518830683552926, "learning_rate": 1.4637076378351039e-05, "loss": 0.2102, "num_tokens": 1235098916.0, "step": 6473 }, { "epoch": 2.2095920805598226, "grad_norm": 0.25477658620625454, "learning_rate": 1.4630753667172484e-05, "loss": 0.2488, "num_tokens": 1235309449.0, "step": 6474 }, { "epoch": 2.209933435739887, "grad_norm": 0.2287591242713583, "learning_rate": 1.4624430955993931e-05, "loss": 0.2294, "num_tokens": 1235514097.0, "step": 6475 }, { "epoch": 2.2102747909199523, "grad_norm": 0.24614283891934707, "learning_rate": 1.4618108244815378e-05, "loss": 0.23, "num_tokens": 1235711049.0, "step": 6476 }, { "epoch": 2.210616146100017, "grad_norm": 0.2627604179862127, "learning_rate": 1.4611785533636823e-05, "loss": 0.2154, "num_tokens": 1235870417.0, "step": 6477 }, { "epoch": 2.210957501280082, "grad_norm": 0.2595989209786766, "learning_rate": 1.460546282245827e-05, "loss": 0.2293, "num_tokens": 1236042795.0, "step": 6478 }, { "epoch": 2.2112988564601466, "grad_norm": 0.22648252106569522, "learning_rate": 1.4599140111279719e-05, "loss": 0.2118, "num_tokens": 1236228437.0, "step": 6479 }, { "epoch": 2.2116402116402116, "grad_norm": 0.2283428764775018, "learning_rate": 1.4592817400101164e-05, "loss": 0.2023, "num_tokens": 1236410916.0, "step": 6480 }, { "epoch": 2.2119815668202767, "grad_norm": 0.25088960169623786, "learning_rate": 1.4586494688922611e-05, "loss": 0.2226, "num_tokens": 1236581122.0, "step": 6481 }, { "epoch": 2.2123229220003413, "grad_norm": 0.23847701768552992, "learning_rate": 1.4580171977744057e-05, "loss": 0.2278, "num_tokens": 1236767626.0, "step": 6482 }, { "epoch": 2.2126642771804064, "grad_norm": 0.23877377737575392, "learning_rate": 1.4573849266565504e-05, "loss": 0.2413, "num_tokens": 1236986332.0, "step": 6483 }, { "epoch": 2.213005632360471, "grad_norm": 0.24541369491166184, "learning_rate": 1.456752655538695e-05, "loss": 0.2654, "num_tokens": 1237193167.0, "step": 6484 }, { "epoch": 2.213346987540536, "grad_norm": 0.22973261616101825, "learning_rate": 1.4561203844208396e-05, "loss": 0.2516, "num_tokens": 1237401439.0, "step": 6485 }, { "epoch": 2.2136883427206007, "grad_norm": 0.2260875682696872, "learning_rate": 1.4554881133029843e-05, "loss": 0.2413, "num_tokens": 1237617289.0, "step": 6486 }, { "epoch": 2.2140296979006657, "grad_norm": 0.2360706235952544, "learning_rate": 1.4548558421851292e-05, "loss": 0.2353, "num_tokens": 1237819095.0, "step": 6487 }, { "epoch": 2.2143710530807303, "grad_norm": 0.24150373475097317, "learning_rate": 1.4542235710672735e-05, "loss": 0.2353, "num_tokens": 1238016601.0, "step": 6488 }, { "epoch": 2.2147124082607954, "grad_norm": 0.21319486740781288, "learning_rate": 1.4535912999494184e-05, "loss": 0.2235, "num_tokens": 1238229296.0, "step": 6489 }, { "epoch": 2.21505376344086, "grad_norm": 0.23814507289081485, "learning_rate": 1.4529590288315631e-05, "loss": 0.2429, "num_tokens": 1238424722.0, "step": 6490 }, { "epoch": 2.215395118620925, "grad_norm": 0.2789237107495927, "learning_rate": 1.4523267577137076e-05, "loss": 0.2266, "num_tokens": 1238580332.0, "step": 6491 }, { "epoch": 2.21573647380099, "grad_norm": 0.24787945526736926, "learning_rate": 1.4516944865958523e-05, "loss": 0.2203, "num_tokens": 1238744252.0, "step": 6492 }, { "epoch": 2.2160778289810548, "grad_norm": 0.24166247108943967, "learning_rate": 1.451062215477997e-05, "loss": 0.2227, "num_tokens": 1238943159.0, "step": 6493 }, { "epoch": 2.21641918416112, "grad_norm": 0.2616076701341996, "learning_rate": 1.4504299443601415e-05, "loss": 0.228, "num_tokens": 1239118656.0, "step": 6494 }, { "epoch": 2.2167605393411844, "grad_norm": 0.2320477609446841, "learning_rate": 1.4497976732422864e-05, "loss": 0.2265, "num_tokens": 1239308857.0, "step": 6495 }, { "epoch": 2.2171018945212495, "grad_norm": 0.22160422510302397, "learning_rate": 1.4491654021244311e-05, "loss": 0.2219, "num_tokens": 1239510600.0, "step": 6496 }, { "epoch": 2.217443249701314, "grad_norm": 0.24065260299166327, "learning_rate": 1.4485331310065756e-05, "loss": 0.2295, "num_tokens": 1239697610.0, "step": 6497 }, { "epoch": 2.217784604881379, "grad_norm": 0.25545411798059653, "learning_rate": 1.4479008598887203e-05, "loss": 0.2227, "num_tokens": 1239854094.0, "step": 6498 }, { "epoch": 2.218125960061444, "grad_norm": 0.2234786250376822, "learning_rate": 1.447268588770865e-05, "loss": 0.2287, "num_tokens": 1240049668.0, "step": 6499 }, { "epoch": 2.218467315241509, "grad_norm": 0.23431502996542086, "learning_rate": 1.4466363176530096e-05, "loss": 0.2158, "num_tokens": 1240244228.0, "step": 6500 }, { "epoch": 2.2188086704215735, "grad_norm": 0.22369943116882401, "learning_rate": 1.4460040465351543e-05, "loss": 0.2319, "num_tokens": 1240433146.0, "step": 6501 }, { "epoch": 2.2191500256016385, "grad_norm": 0.24944138911878094, "learning_rate": 1.4453717754172991e-05, "loss": 0.2486, "num_tokens": 1240624334.0, "step": 6502 }, { "epoch": 2.219491380781703, "grad_norm": 0.24625709580970875, "learning_rate": 1.4447395042994435e-05, "loss": 0.2441, "num_tokens": 1240823861.0, "step": 6503 }, { "epoch": 2.219832735961768, "grad_norm": 0.23877302581976076, "learning_rate": 1.4441072331815884e-05, "loss": 0.2384, "num_tokens": 1241006494.0, "step": 6504 }, { "epoch": 2.2201740911418333, "grad_norm": 0.23650357967092844, "learning_rate": 1.443474962063733e-05, "loss": 0.2153, "num_tokens": 1241186142.0, "step": 6505 }, { "epoch": 2.220515446321898, "grad_norm": 0.2177726173171007, "learning_rate": 1.4428426909458776e-05, "loss": 0.21, "num_tokens": 1241367768.0, "step": 6506 }, { "epoch": 2.220856801501963, "grad_norm": 0.21106058944808245, "learning_rate": 1.4422104198280223e-05, "loss": 0.2141, "num_tokens": 1241553311.0, "step": 6507 }, { "epoch": 2.2211981566820276, "grad_norm": 0.23192137770972848, "learning_rate": 1.441578148710167e-05, "loss": 0.2561, "num_tokens": 1241775284.0, "step": 6508 }, { "epoch": 2.2215395118620926, "grad_norm": 0.22937918176109445, "learning_rate": 1.4409458775923115e-05, "loss": 0.2295, "num_tokens": 1241984567.0, "step": 6509 }, { "epoch": 2.2218808670421573, "grad_norm": 0.23964187614196575, "learning_rate": 1.4403136064744564e-05, "loss": 0.2255, "num_tokens": 1242186724.0, "step": 6510 }, { "epoch": 2.2222222222222223, "grad_norm": 0.22799011579044898, "learning_rate": 1.439681335356601e-05, "loss": 0.2477, "num_tokens": 1242401957.0, "step": 6511 }, { "epoch": 2.222563577402287, "grad_norm": 0.24804986836109807, "learning_rate": 1.4390490642387456e-05, "loss": 0.2246, "num_tokens": 1242603331.0, "step": 6512 }, { "epoch": 2.222904932582352, "grad_norm": 0.23391657388727755, "learning_rate": 1.4384167931208903e-05, "loss": 0.2469, "num_tokens": 1242820194.0, "step": 6513 }, { "epoch": 2.2232462877624166, "grad_norm": 0.230510157482452, "learning_rate": 1.437784522003035e-05, "loss": 0.2316, "num_tokens": 1243012136.0, "step": 6514 }, { "epoch": 2.2235876429424817, "grad_norm": 0.23318943534560216, "learning_rate": 1.4371522508851795e-05, "loss": 0.2, "num_tokens": 1243186402.0, "step": 6515 }, { "epoch": 2.2239289981225463, "grad_norm": 0.21840171265848826, "learning_rate": 1.4365199797673242e-05, "loss": 0.2238, "num_tokens": 1243390641.0, "step": 6516 }, { "epoch": 2.2242703533026114, "grad_norm": 0.26278765704270146, "learning_rate": 1.4358877086494691e-05, "loss": 0.2484, "num_tokens": 1243546211.0, "step": 6517 }, { "epoch": 2.2246117084826764, "grad_norm": 0.23544137839411153, "learning_rate": 1.4352554375316135e-05, "loss": 0.2304, "num_tokens": 1243739903.0, "step": 6518 }, { "epoch": 2.224953063662741, "grad_norm": 0.23317075366649265, "learning_rate": 1.4346231664137583e-05, "loss": 0.2281, "num_tokens": 1243921256.0, "step": 6519 }, { "epoch": 2.225294418842806, "grad_norm": 0.23823265941575267, "learning_rate": 1.433990895295903e-05, "loss": 0.2378, "num_tokens": 1244110190.0, "step": 6520 }, { "epoch": 2.2256357740228707, "grad_norm": 0.26544601879577123, "learning_rate": 1.4333586241780475e-05, "loss": 0.2277, "num_tokens": 1244305496.0, "step": 6521 }, { "epoch": 2.225977129202936, "grad_norm": 0.24769171488273814, "learning_rate": 1.4327263530601922e-05, "loss": 0.2437, "num_tokens": 1244495561.0, "step": 6522 }, { "epoch": 2.2263184843830004, "grad_norm": 0.25863374733528743, "learning_rate": 1.432094081942337e-05, "loss": 0.2297, "num_tokens": 1244669608.0, "step": 6523 }, { "epoch": 2.2266598395630655, "grad_norm": 0.24313185992260752, "learning_rate": 1.4314618108244815e-05, "loss": 0.2634, "num_tokens": 1244886822.0, "step": 6524 }, { "epoch": 2.22700119474313, "grad_norm": 0.22860664901561858, "learning_rate": 1.4308295397066262e-05, "loss": 0.239, "num_tokens": 1245086099.0, "step": 6525 }, { "epoch": 2.227342549923195, "grad_norm": 0.22448120578387362, "learning_rate": 1.430197268588771e-05, "loss": 0.2399, "num_tokens": 1245310139.0, "step": 6526 }, { "epoch": 2.2276839051032598, "grad_norm": 0.21325852530349051, "learning_rate": 1.4295649974709156e-05, "loss": 0.2401, "num_tokens": 1245538592.0, "step": 6527 }, { "epoch": 2.228025260283325, "grad_norm": 0.23128769423469475, "learning_rate": 1.4289327263530603e-05, "loss": 0.2267, "num_tokens": 1245736343.0, "step": 6528 }, { "epoch": 2.22836661546339, "grad_norm": 0.2341861401114557, "learning_rate": 1.428300455235205e-05, "loss": 0.2483, "num_tokens": 1245972510.0, "step": 6529 }, { "epoch": 2.2287079706434545, "grad_norm": 0.2524747284038499, "learning_rate": 1.4276681841173495e-05, "loss": 0.2359, "num_tokens": 1246176847.0, "step": 6530 }, { "epoch": 2.2290493258235196, "grad_norm": 0.22629589937144962, "learning_rate": 1.4270359129994942e-05, "loss": 0.2171, "num_tokens": 1246371578.0, "step": 6531 }, { "epoch": 2.229390681003584, "grad_norm": 0.22966801218330024, "learning_rate": 1.426403641881639e-05, "loss": 0.234, "num_tokens": 1246556176.0, "step": 6532 }, { "epoch": 2.2297320361836492, "grad_norm": 0.21572043954793835, "learning_rate": 1.4257713707637834e-05, "loss": 0.2326, "num_tokens": 1246763281.0, "step": 6533 }, { "epoch": 2.230073391363714, "grad_norm": 0.25220278255582085, "learning_rate": 1.4251390996459283e-05, "loss": 0.2426, "num_tokens": 1246934478.0, "step": 6534 }, { "epoch": 2.230414746543779, "grad_norm": 0.2161602041118444, "learning_rate": 1.424506828528073e-05, "loss": 0.2483, "num_tokens": 1247161480.0, "step": 6535 }, { "epoch": 2.2307561017238435, "grad_norm": 0.24710589179724163, "learning_rate": 1.4238745574102175e-05, "loss": 0.2024, "num_tokens": 1247317260.0, "step": 6536 }, { "epoch": 2.2310974569039086, "grad_norm": 0.27026753108408913, "learning_rate": 1.4232422862923622e-05, "loss": 0.2329, "num_tokens": 1247474124.0, "step": 6537 }, { "epoch": 2.231438812083973, "grad_norm": 0.24205630958735494, "learning_rate": 1.4226100151745069e-05, "loss": 0.213, "num_tokens": 1247656763.0, "step": 6538 }, { "epoch": 2.2317801672640383, "grad_norm": 0.26199929286249507, "learning_rate": 1.4219777440566514e-05, "loss": 0.2163, "num_tokens": 1247808575.0, "step": 6539 }, { "epoch": 2.232121522444103, "grad_norm": 0.21145112713350458, "learning_rate": 1.4213454729387961e-05, "loss": 0.2315, "num_tokens": 1248034688.0, "step": 6540 }, { "epoch": 2.232462877624168, "grad_norm": 0.2515549068498122, "learning_rate": 1.420713201820941e-05, "loss": 0.2398, "num_tokens": 1248217492.0, "step": 6541 }, { "epoch": 2.2328042328042326, "grad_norm": 0.2512406991879478, "learning_rate": 1.4200809307030855e-05, "loss": 0.2362, "num_tokens": 1248403341.0, "step": 6542 }, { "epoch": 2.2331455879842976, "grad_norm": 0.2528511449959801, "learning_rate": 1.4194486595852302e-05, "loss": 0.2177, "num_tokens": 1248574793.0, "step": 6543 }, { "epoch": 2.2334869431643627, "grad_norm": 0.24290708017833207, "learning_rate": 1.418816388467375e-05, "loss": 0.2386, "num_tokens": 1248752973.0, "step": 6544 }, { "epoch": 2.2338282983444273, "grad_norm": 0.26917358242603634, "learning_rate": 1.4181841173495195e-05, "loss": 0.2164, "num_tokens": 1248906675.0, "step": 6545 }, { "epoch": 2.2341696535244924, "grad_norm": 0.2625738905641705, "learning_rate": 1.4175518462316642e-05, "loss": 0.2213, "num_tokens": 1249049489.0, "step": 6546 }, { "epoch": 2.234511008704557, "grad_norm": 0.22466890271184942, "learning_rate": 1.416919575113809e-05, "loss": 0.2267, "num_tokens": 1249256896.0, "step": 6547 }, { "epoch": 2.234852363884622, "grad_norm": 0.2539519113859935, "learning_rate": 1.4162873039959534e-05, "loss": 0.1996, "num_tokens": 1249416361.0, "step": 6548 }, { "epoch": 2.2351937190646867, "grad_norm": 0.2349716842786755, "learning_rate": 1.4156550328780983e-05, "loss": 0.2071, "num_tokens": 1249605134.0, "step": 6549 }, { "epoch": 2.2355350742447517, "grad_norm": 0.24180690286732753, "learning_rate": 1.415022761760243e-05, "loss": 0.2521, "num_tokens": 1249786149.0, "step": 6550 }, { "epoch": 2.2358764294248163, "grad_norm": 0.2295656079363202, "learning_rate": 1.4143904906423875e-05, "loss": 0.2344, "num_tokens": 1250008129.0, "step": 6551 }, { "epoch": 2.2362177846048814, "grad_norm": 0.29045018504140585, "learning_rate": 1.4137582195245322e-05, "loss": 0.243, "num_tokens": 1250186379.0, "step": 6552 }, { "epoch": 2.236559139784946, "grad_norm": 0.2711558945402013, "learning_rate": 1.4131259484066769e-05, "loss": 0.2286, "num_tokens": 1250344724.0, "step": 6553 }, { "epoch": 2.236900494965011, "grad_norm": 0.23990859930063238, "learning_rate": 1.4124936772888214e-05, "loss": 0.2312, "num_tokens": 1250550521.0, "step": 6554 }, { "epoch": 2.237241850145076, "grad_norm": 0.2541081738841709, "learning_rate": 1.4118614061709661e-05, "loss": 0.2334, "num_tokens": 1250732554.0, "step": 6555 }, { "epoch": 2.2375832053251408, "grad_norm": 0.24805669657795226, "learning_rate": 1.411229135053111e-05, "loss": 0.2305, "num_tokens": 1250888553.0, "step": 6556 }, { "epoch": 2.237924560505206, "grad_norm": 0.22966865211168055, "learning_rate": 1.4105968639352555e-05, "loss": 0.2431, "num_tokens": 1251123340.0, "step": 6557 }, { "epoch": 2.2382659156852704, "grad_norm": 0.25994688433846896, "learning_rate": 1.4099645928174002e-05, "loss": 0.2299, "num_tokens": 1251272235.0, "step": 6558 }, { "epoch": 2.2386072708653355, "grad_norm": 0.216870500504026, "learning_rate": 1.4093323216995449e-05, "loss": 0.2493, "num_tokens": 1251509051.0, "step": 6559 }, { "epoch": 2.2389486260454, "grad_norm": 0.2540862566392523, "learning_rate": 1.4087000505816894e-05, "loss": 0.1982, "num_tokens": 1251662392.0, "step": 6560 }, { "epoch": 2.239289981225465, "grad_norm": 0.23394359990815716, "learning_rate": 1.4080677794638341e-05, "loss": 0.2528, "num_tokens": 1251874094.0, "step": 6561 }, { "epoch": 2.23963133640553, "grad_norm": 0.2191784109399759, "learning_rate": 1.407435508345979e-05, "loss": 0.2388, "num_tokens": 1252114914.0, "step": 6562 }, { "epoch": 2.239972691585595, "grad_norm": 0.24581968437496737, "learning_rate": 1.4068032372281234e-05, "loss": 0.2241, "num_tokens": 1252307874.0, "step": 6563 }, { "epoch": 2.2403140467656595, "grad_norm": 0.2592663089738113, "learning_rate": 1.4061709661102682e-05, "loss": 0.2388, "num_tokens": 1252499737.0, "step": 6564 }, { "epoch": 2.2406554019457245, "grad_norm": 0.24464417887051848, "learning_rate": 1.405538694992413e-05, "loss": 0.2341, "num_tokens": 1252679902.0, "step": 6565 }, { "epoch": 2.2409967571257896, "grad_norm": 0.2416116868532713, "learning_rate": 1.4049064238745574e-05, "loss": 0.2316, "num_tokens": 1252878203.0, "step": 6566 }, { "epoch": 2.2413381123058542, "grad_norm": 0.25148479346128133, "learning_rate": 1.4042741527567021e-05, "loss": 0.2519, "num_tokens": 1253066030.0, "step": 6567 }, { "epoch": 2.2416794674859193, "grad_norm": 0.2098304105515887, "learning_rate": 1.4036418816388468e-05, "loss": 0.2108, "num_tokens": 1253260830.0, "step": 6568 }, { "epoch": 2.242020822665984, "grad_norm": 0.23791968977996383, "learning_rate": 1.4030096105209914e-05, "loss": 0.2484, "num_tokens": 1253469403.0, "step": 6569 }, { "epoch": 2.242362177846049, "grad_norm": 0.23323159078251698, "learning_rate": 1.402377339403136e-05, "loss": 0.2654, "num_tokens": 1253672580.0, "step": 6570 }, { "epoch": 2.2427035330261136, "grad_norm": 0.23747224392756605, "learning_rate": 1.401745068285281e-05, "loss": 0.2311, "num_tokens": 1253852253.0, "step": 6571 }, { "epoch": 2.2430448882061786, "grad_norm": 0.2500183120918915, "learning_rate": 1.4011127971674255e-05, "loss": 0.2322, "num_tokens": 1254010982.0, "step": 6572 }, { "epoch": 2.2433862433862433, "grad_norm": 0.2361301062122755, "learning_rate": 1.4004805260495702e-05, "loss": 0.2274, "num_tokens": 1254199116.0, "step": 6573 }, { "epoch": 2.2437275985663083, "grad_norm": 0.2493696940314901, "learning_rate": 1.3998482549317149e-05, "loss": 0.2306, "num_tokens": 1254391857.0, "step": 6574 }, { "epoch": 2.244068953746373, "grad_norm": 0.2601487844712346, "learning_rate": 1.3992159838138594e-05, "loss": 0.2157, "num_tokens": 1254557166.0, "step": 6575 }, { "epoch": 2.244410308926438, "grad_norm": 0.232417622219914, "learning_rate": 1.3985837126960041e-05, "loss": 0.2321, "num_tokens": 1254743260.0, "step": 6576 }, { "epoch": 2.2447516641065026, "grad_norm": 0.20184084212299036, "learning_rate": 1.397951441578149e-05, "loss": 0.2356, "num_tokens": 1254980106.0, "step": 6577 }, { "epoch": 2.2450930192865677, "grad_norm": 0.2195108390454014, "learning_rate": 1.3973191704602933e-05, "loss": 0.2335, "num_tokens": 1255196424.0, "step": 6578 }, { "epoch": 2.2454343744666323, "grad_norm": 0.2117262652635981, "learning_rate": 1.3966868993424382e-05, "loss": 0.2291, "num_tokens": 1255424980.0, "step": 6579 }, { "epoch": 2.2457757296466974, "grad_norm": 0.23543837968496167, "learning_rate": 1.3960546282245829e-05, "loss": 0.2089, "num_tokens": 1255581583.0, "step": 6580 }, { "epoch": 2.2461170848267624, "grad_norm": 0.22829238212606745, "learning_rate": 1.3954223571067274e-05, "loss": 0.2489, "num_tokens": 1255808792.0, "step": 6581 }, { "epoch": 2.246458440006827, "grad_norm": 0.23217799037866055, "learning_rate": 1.3947900859888721e-05, "loss": 0.2487, "num_tokens": 1256026990.0, "step": 6582 }, { "epoch": 2.246799795186892, "grad_norm": 0.2300357668688175, "learning_rate": 1.3941578148710168e-05, "loss": 0.2353, "num_tokens": 1256218025.0, "step": 6583 }, { "epoch": 2.2471411503669567, "grad_norm": 0.2641947151061995, "learning_rate": 1.3935255437531613e-05, "loss": 0.2237, "num_tokens": 1256394992.0, "step": 6584 }, { "epoch": 2.247482505547022, "grad_norm": 0.2530780336004699, "learning_rate": 1.392893272635306e-05, "loss": 0.2278, "num_tokens": 1256560022.0, "step": 6585 }, { "epoch": 2.2478238607270864, "grad_norm": 0.2362561020667252, "learning_rate": 1.3922610015174509e-05, "loss": 0.2345, "num_tokens": 1256740828.0, "step": 6586 }, { "epoch": 2.2481652159071515, "grad_norm": 0.22888024071896146, "learning_rate": 1.3916287303995954e-05, "loss": 0.2459, "num_tokens": 1256964327.0, "step": 6587 }, { "epoch": 2.248506571087216, "grad_norm": 0.3475103143188756, "learning_rate": 1.3909964592817401e-05, "loss": 0.2152, "num_tokens": 1257137861.0, "step": 6588 }, { "epoch": 2.248847926267281, "grad_norm": 0.25319231945529597, "learning_rate": 1.3903641881638848e-05, "loss": 0.2323, "num_tokens": 1257314863.0, "step": 6589 }, { "epoch": 2.2491892814473458, "grad_norm": 0.22246242153600485, "learning_rate": 1.3897319170460294e-05, "loss": 0.2267, "num_tokens": 1257525270.0, "step": 6590 }, { "epoch": 2.249530636627411, "grad_norm": 0.24478788734367424, "learning_rate": 1.389099645928174e-05, "loss": 0.2382, "num_tokens": 1257725545.0, "step": 6591 }, { "epoch": 2.249871991807476, "grad_norm": 0.23340757927738828, "learning_rate": 1.388467374810319e-05, "loss": 0.232, "num_tokens": 1257920184.0, "step": 6592 }, { "epoch": 2.2502133469875405, "grad_norm": 0.22367572122107388, "learning_rate": 1.3878351036924633e-05, "loss": 0.2111, "num_tokens": 1258114812.0, "step": 6593 }, { "epoch": 2.2505547021676056, "grad_norm": 0.24998441898776624, "learning_rate": 1.3872028325746082e-05, "loss": 0.2435, "num_tokens": 1258281739.0, "step": 6594 }, { "epoch": 2.25089605734767, "grad_norm": 0.251625217530114, "learning_rate": 1.3865705614567528e-05, "loss": 0.2385, "num_tokens": 1258477488.0, "step": 6595 }, { "epoch": 2.2512374125277352, "grad_norm": 0.25094353374459083, "learning_rate": 1.3859382903388974e-05, "loss": 0.2651, "num_tokens": 1258700136.0, "step": 6596 }, { "epoch": 2.2515787677078, "grad_norm": 0.25585660364857593, "learning_rate": 1.385306019221042e-05, "loss": 0.2182, "num_tokens": 1258870182.0, "step": 6597 }, { "epoch": 2.251920122887865, "grad_norm": 0.23082125525380184, "learning_rate": 1.3846737481031868e-05, "loss": 0.2321, "num_tokens": 1259052370.0, "step": 6598 }, { "epoch": 2.2522614780679295, "grad_norm": 0.21716676023938622, "learning_rate": 1.3840414769853313e-05, "loss": 0.224, "num_tokens": 1259292031.0, "step": 6599 }, { "epoch": 2.2526028332479946, "grad_norm": 0.2498856561663003, "learning_rate": 1.383409205867476e-05, "loss": 0.2522, "num_tokens": 1259480470.0, "step": 6600 }, { "epoch": 2.252944188428059, "grad_norm": 0.2366002533452037, "learning_rate": 1.3827769347496209e-05, "loss": 0.2191, "num_tokens": 1259656387.0, "step": 6601 }, { "epoch": 2.2532855436081243, "grad_norm": 0.22507185377069958, "learning_rate": 1.3821446636317652e-05, "loss": 0.2527, "num_tokens": 1259855911.0, "step": 6602 }, { "epoch": 2.2536268987881893, "grad_norm": 0.2364735824714003, "learning_rate": 1.3815123925139101e-05, "loss": 0.2354, "num_tokens": 1260054201.0, "step": 6603 }, { "epoch": 2.253968253968254, "grad_norm": 0.2314962327957996, "learning_rate": 1.3808801213960548e-05, "loss": 0.2114, "num_tokens": 1260259763.0, "step": 6604 }, { "epoch": 2.254309609148319, "grad_norm": 0.2557020013235151, "learning_rate": 1.3802478502781993e-05, "loss": 0.2201, "num_tokens": 1260422677.0, "step": 6605 }, { "epoch": 2.2546509643283836, "grad_norm": 0.2446684076075414, "learning_rate": 1.379615579160344e-05, "loss": 0.227, "num_tokens": 1260599527.0, "step": 6606 }, { "epoch": 2.2549923195084487, "grad_norm": 0.23405644684482652, "learning_rate": 1.3789833080424889e-05, "loss": 0.2232, "num_tokens": 1260801721.0, "step": 6607 }, { "epoch": 2.2553336746885133, "grad_norm": 0.25915153963621257, "learning_rate": 1.3783510369246332e-05, "loss": 0.2298, "num_tokens": 1260972911.0, "step": 6608 }, { "epoch": 2.2556750298685784, "grad_norm": 0.25792439531179207, "learning_rate": 1.3777187658067781e-05, "loss": 0.2409, "num_tokens": 1261146378.0, "step": 6609 }, { "epoch": 2.256016385048643, "grad_norm": 0.2444619788547799, "learning_rate": 1.3770864946889228e-05, "loss": 0.223, "num_tokens": 1261314903.0, "step": 6610 }, { "epoch": 2.256357740228708, "grad_norm": 0.2360448078831866, "learning_rate": 1.3764542235710673e-05, "loss": 0.2375, "num_tokens": 1261518975.0, "step": 6611 }, { "epoch": 2.2566990954087727, "grad_norm": 0.24100204227525376, "learning_rate": 1.375821952453212e-05, "loss": 0.2252, "num_tokens": 1261716176.0, "step": 6612 }, { "epoch": 2.2570404505888377, "grad_norm": 0.2635381340473525, "learning_rate": 1.3751896813353567e-05, "loss": 0.2506, "num_tokens": 1261904957.0, "step": 6613 }, { "epoch": 2.2573818057689023, "grad_norm": 0.23986729191217127, "learning_rate": 1.3745574102175013e-05, "loss": 0.2352, "num_tokens": 1262116535.0, "step": 6614 }, { "epoch": 2.2577231609489674, "grad_norm": 0.2214830330395237, "learning_rate": 1.373925139099646e-05, "loss": 0.236, "num_tokens": 1262334291.0, "step": 6615 }, { "epoch": 2.258064516129032, "grad_norm": 0.23681125505362524, "learning_rate": 1.3732928679817905e-05, "loss": 0.231, "num_tokens": 1262515695.0, "step": 6616 }, { "epoch": 2.258405871309097, "grad_norm": 0.24474187353243387, "learning_rate": 1.3726605968639352e-05, "loss": 0.225, "num_tokens": 1262695379.0, "step": 6617 }, { "epoch": 2.258747226489162, "grad_norm": 0.2551526019703138, "learning_rate": 1.37202832574608e-05, "loss": 0.2509, "num_tokens": 1262878055.0, "step": 6618 }, { "epoch": 2.2590885816692268, "grad_norm": 0.2510815287573602, "learning_rate": 1.3713960546282246e-05, "loss": 0.2464, "num_tokens": 1263063403.0, "step": 6619 }, { "epoch": 2.259429936849292, "grad_norm": 0.24407757043543252, "learning_rate": 1.3707637835103693e-05, "loss": 0.2392, "num_tokens": 1263251090.0, "step": 6620 }, { "epoch": 2.2597712920293564, "grad_norm": 0.23773604116926267, "learning_rate": 1.370131512392514e-05, "loss": 0.2123, "num_tokens": 1263436199.0, "step": 6621 }, { "epoch": 2.2601126472094215, "grad_norm": 0.24423539483498105, "learning_rate": 1.3694992412746585e-05, "loss": 0.2094, "num_tokens": 1263604660.0, "step": 6622 }, { "epoch": 2.260454002389486, "grad_norm": 0.22322250943596708, "learning_rate": 1.3688669701568032e-05, "loss": 0.2311, "num_tokens": 1263806538.0, "step": 6623 }, { "epoch": 2.260795357569551, "grad_norm": 0.22446855825888204, "learning_rate": 1.368234699038948e-05, "loss": 0.2239, "num_tokens": 1264013568.0, "step": 6624 }, { "epoch": 2.261136712749616, "grad_norm": 0.23395015488759335, "learning_rate": 1.3676024279210924e-05, "loss": 0.2255, "num_tokens": 1264191764.0, "step": 6625 }, { "epoch": 2.261478067929681, "grad_norm": 0.23974912741391377, "learning_rate": 1.3669701568032373e-05, "loss": 0.2223, "num_tokens": 1264383903.0, "step": 6626 }, { "epoch": 2.2618194231097455, "grad_norm": 0.24090255358255622, "learning_rate": 1.366337885685382e-05, "loss": 0.2381, "num_tokens": 1264593297.0, "step": 6627 }, { "epoch": 2.2621607782898105, "grad_norm": 0.24180049627184602, "learning_rate": 1.3657056145675265e-05, "loss": 0.2169, "num_tokens": 1264771587.0, "step": 6628 }, { "epoch": 2.2625021334698756, "grad_norm": 0.23586481178138383, "learning_rate": 1.3650733434496712e-05, "loss": 0.225, "num_tokens": 1264944686.0, "step": 6629 }, { "epoch": 2.2628434886499402, "grad_norm": 0.21993296459871747, "learning_rate": 1.364441072331816e-05, "loss": 0.2109, "num_tokens": 1265134996.0, "step": 6630 }, { "epoch": 2.2631848438300053, "grad_norm": 0.28289207268231154, "learning_rate": 1.3638088012139605e-05, "loss": 0.2412, "num_tokens": 1265294934.0, "step": 6631 }, { "epoch": 2.26352619901007, "grad_norm": 0.22977242047703192, "learning_rate": 1.3631765300961052e-05, "loss": 0.2358, "num_tokens": 1265503754.0, "step": 6632 }, { "epoch": 2.263867554190135, "grad_norm": 0.2311572757362991, "learning_rate": 1.36254425897825e-05, "loss": 0.2374, "num_tokens": 1265703597.0, "step": 6633 }, { "epoch": 2.2642089093701996, "grad_norm": 0.2104426411182919, "learning_rate": 1.3619119878603946e-05, "loss": 0.236, "num_tokens": 1265956084.0, "step": 6634 }, { "epoch": 2.2645502645502646, "grad_norm": 0.27127334042992735, "learning_rate": 1.3612797167425393e-05, "loss": 0.2038, "num_tokens": 1266102063.0, "step": 6635 }, { "epoch": 2.2648916197303293, "grad_norm": 0.23651567835660084, "learning_rate": 1.360647445624684e-05, "loss": 0.237, "num_tokens": 1266271951.0, "step": 6636 }, { "epoch": 2.2652329749103943, "grad_norm": 0.2368368589018876, "learning_rate": 1.3600151745068285e-05, "loss": 0.2394, "num_tokens": 1266462377.0, "step": 6637 }, { "epoch": 2.265574330090459, "grad_norm": 0.2243548893592643, "learning_rate": 1.3593829033889732e-05, "loss": 0.2371, "num_tokens": 1266669610.0, "step": 6638 }, { "epoch": 2.265915685270524, "grad_norm": 0.23061380734877962, "learning_rate": 1.358750632271118e-05, "loss": 0.2211, "num_tokens": 1266878517.0, "step": 6639 }, { "epoch": 2.266257040450589, "grad_norm": 3.695330803226034, "learning_rate": 1.3581183611532624e-05, "loss": 0.2811, "num_tokens": 1267083328.0, "step": 6640 }, { "epoch": 2.2665983956306537, "grad_norm": 0.2193532609054292, "learning_rate": 1.3574860900354073e-05, "loss": 0.2265, "num_tokens": 1267287820.0, "step": 6641 }, { "epoch": 2.2669397508107187, "grad_norm": 0.2424146127393489, "learning_rate": 1.356853818917552e-05, "loss": 0.2233, "num_tokens": 1267462387.0, "step": 6642 }, { "epoch": 2.2672811059907834, "grad_norm": 0.23163005714564378, "learning_rate": 1.3562215477996965e-05, "loss": 0.2444, "num_tokens": 1267670703.0, "step": 6643 }, { "epoch": 2.2676224611708484, "grad_norm": 0.26486770917670704, "learning_rate": 1.3555892766818412e-05, "loss": 0.2244, "num_tokens": 1267820310.0, "step": 6644 }, { "epoch": 2.267963816350913, "grad_norm": 0.2426046176602145, "learning_rate": 1.3549570055639859e-05, "loss": 0.2139, "num_tokens": 1268011775.0, "step": 6645 }, { "epoch": 2.268305171530978, "grad_norm": 0.36540982165234853, "learning_rate": 1.3543247344461304e-05, "loss": 0.2254, "num_tokens": 1268226715.0, "step": 6646 }, { "epoch": 2.2686465267110427, "grad_norm": 0.23290214055120737, "learning_rate": 1.3536924633282751e-05, "loss": 0.2335, "num_tokens": 1268389712.0, "step": 6647 }, { "epoch": 2.268987881891108, "grad_norm": 0.24280686526959278, "learning_rate": 1.35306019221042e-05, "loss": 0.2033, "num_tokens": 1268569862.0, "step": 6648 }, { "epoch": 2.2693292370711724, "grad_norm": 0.24693228854280583, "learning_rate": 1.3524279210925645e-05, "loss": 0.2385, "num_tokens": 1268759120.0, "step": 6649 }, { "epoch": 2.2696705922512375, "grad_norm": 0.24506489136882373, "learning_rate": 1.3517956499747092e-05, "loss": 0.2258, "num_tokens": 1268922906.0, "step": 6650 }, { "epoch": 2.270011947431302, "grad_norm": 0.2692465366470356, "learning_rate": 1.351163378856854e-05, "loss": 0.2165, "num_tokens": 1269059657.0, "step": 6651 }, { "epoch": 2.270353302611367, "grad_norm": 0.26413032195088054, "learning_rate": 1.3505311077389985e-05, "loss": 0.232, "num_tokens": 1269229468.0, "step": 6652 }, { "epoch": 2.2706946577914318, "grad_norm": 0.22646575436315006, "learning_rate": 1.3498988366211431e-05, "loss": 0.2261, "num_tokens": 1269428863.0, "step": 6653 }, { "epoch": 2.271036012971497, "grad_norm": 0.2379305535967232, "learning_rate": 1.349266565503288e-05, "loss": 0.2464, "num_tokens": 1269643935.0, "step": 6654 }, { "epoch": 2.271377368151562, "grad_norm": 0.22287846448375154, "learning_rate": 1.3486342943854324e-05, "loss": 0.1993, "num_tokens": 1269832951.0, "step": 6655 }, { "epoch": 2.2717187233316265, "grad_norm": 0.24076714662378224, "learning_rate": 1.3480020232675772e-05, "loss": 0.243, "num_tokens": 1270034190.0, "step": 6656 }, { "epoch": 2.2720600785116916, "grad_norm": 0.2519231814898827, "learning_rate": 1.347369752149722e-05, "loss": 0.2335, "num_tokens": 1270215829.0, "step": 6657 }, { "epoch": 2.272401433691756, "grad_norm": 0.2433448802059206, "learning_rate": 1.3467374810318665e-05, "loss": 0.2155, "num_tokens": 1270387150.0, "step": 6658 }, { "epoch": 2.2727427888718212, "grad_norm": 0.23470929760529458, "learning_rate": 1.3461052099140112e-05, "loss": 0.2234, "num_tokens": 1270582762.0, "step": 6659 }, { "epoch": 2.273084144051886, "grad_norm": 0.21561343037936287, "learning_rate": 1.3454729387961559e-05, "loss": 0.2279, "num_tokens": 1270794506.0, "step": 6660 }, { "epoch": 2.273425499231951, "grad_norm": 0.21941223962144893, "learning_rate": 1.3448406676783004e-05, "loss": 0.216, "num_tokens": 1271012506.0, "step": 6661 }, { "epoch": 2.2737668544120155, "grad_norm": 0.22392306889023184, "learning_rate": 1.3442083965604451e-05, "loss": 0.2269, "num_tokens": 1271214730.0, "step": 6662 }, { "epoch": 2.2741082095920806, "grad_norm": 0.2284744034390865, "learning_rate": 1.34357612544259e-05, "loss": 0.2245, "num_tokens": 1271399833.0, "step": 6663 }, { "epoch": 2.274449564772145, "grad_norm": 0.22656413895696603, "learning_rate": 1.3429438543247345e-05, "loss": 0.2165, "num_tokens": 1271598207.0, "step": 6664 }, { "epoch": 2.2747909199522103, "grad_norm": 0.21998149922572555, "learning_rate": 1.3423115832068792e-05, "loss": 0.2205, "num_tokens": 1271794620.0, "step": 6665 }, { "epoch": 2.2751322751322753, "grad_norm": 0.24590764669862586, "learning_rate": 1.3416793120890239e-05, "loss": 0.2249, "num_tokens": 1271976535.0, "step": 6666 }, { "epoch": 2.27547363031234, "grad_norm": 0.2485397649118836, "learning_rate": 1.3410470409711684e-05, "loss": 0.2477, "num_tokens": 1272159232.0, "step": 6667 }, { "epoch": 2.275814985492405, "grad_norm": 0.21249779262319832, "learning_rate": 1.3404147698533131e-05, "loss": 0.2069, "num_tokens": 1272350693.0, "step": 6668 }, { "epoch": 2.2761563406724696, "grad_norm": 0.23371727393183792, "learning_rate": 1.339782498735458e-05, "loss": 0.2484, "num_tokens": 1272549783.0, "step": 6669 }, { "epoch": 2.2764976958525347, "grad_norm": 0.20824798323226548, "learning_rate": 1.3391502276176023e-05, "loss": 0.2329, "num_tokens": 1272775883.0, "step": 6670 }, { "epoch": 2.2768390510325993, "grad_norm": 0.23347047298885087, "learning_rate": 1.3385179564997472e-05, "loss": 0.2218, "num_tokens": 1272973552.0, "step": 6671 }, { "epoch": 2.2771804062126644, "grad_norm": 0.21546360466620953, "learning_rate": 1.3378856853818919e-05, "loss": 0.2563, "num_tokens": 1273227260.0, "step": 6672 }, { "epoch": 2.277521761392729, "grad_norm": 0.22877795443100674, "learning_rate": 1.3372534142640364e-05, "loss": 0.2418, "num_tokens": 1273429198.0, "step": 6673 }, { "epoch": 2.277863116572794, "grad_norm": 0.23594617266805354, "learning_rate": 1.3366211431461811e-05, "loss": 0.21, "num_tokens": 1273609491.0, "step": 6674 }, { "epoch": 2.2782044717528587, "grad_norm": 0.25528575196182896, "learning_rate": 1.3359888720283258e-05, "loss": 0.2277, "num_tokens": 1273775152.0, "step": 6675 }, { "epoch": 2.2785458269329237, "grad_norm": 0.25269746391427783, "learning_rate": 1.3353566009104704e-05, "loss": 0.2278, "num_tokens": 1273946422.0, "step": 6676 }, { "epoch": 2.278887182112989, "grad_norm": 0.23054509365994377, "learning_rate": 1.334724329792615e-05, "loss": 0.2326, "num_tokens": 1274132937.0, "step": 6677 }, { "epoch": 2.2792285372930534, "grad_norm": 0.2618109865308967, "learning_rate": 1.33409205867476e-05, "loss": 0.2256, "num_tokens": 1274302749.0, "step": 6678 }, { "epoch": 2.279569892473118, "grad_norm": 0.22553776695717892, "learning_rate": 1.3334597875569043e-05, "loss": 0.2221, "num_tokens": 1274509134.0, "step": 6679 }, { "epoch": 2.279911247653183, "grad_norm": 0.20729874353678526, "learning_rate": 1.3328275164390492e-05, "loss": 0.2046, "num_tokens": 1274704487.0, "step": 6680 }, { "epoch": 2.280252602833248, "grad_norm": 0.24620245001149807, "learning_rate": 1.3321952453211939e-05, "loss": 0.2341, "num_tokens": 1274876847.0, "step": 6681 }, { "epoch": 2.2805939580133128, "grad_norm": 0.22796075255461684, "learning_rate": 1.3315629742033384e-05, "loss": 0.2412, "num_tokens": 1275092746.0, "step": 6682 }, { "epoch": 2.280935313193378, "grad_norm": 0.2837262457353377, "learning_rate": 1.330930703085483e-05, "loss": 0.208, "num_tokens": 1275224920.0, "step": 6683 }, { "epoch": 2.2812766683734425, "grad_norm": 0.2635541138241494, "learning_rate": 1.330298431967628e-05, "loss": 0.2191, "num_tokens": 1275385689.0, "step": 6684 }, { "epoch": 2.2816180235535075, "grad_norm": 0.24517227205105432, "learning_rate": 1.3296661608497723e-05, "loss": 0.2519, "num_tokens": 1275577634.0, "step": 6685 }, { "epoch": 2.281959378733572, "grad_norm": 0.21172159811791064, "learning_rate": 1.3290338897319172e-05, "loss": 0.2409, "num_tokens": 1275806192.0, "step": 6686 }, { "epoch": 2.282300733913637, "grad_norm": 0.22055205291446336, "learning_rate": 1.3284016186140619e-05, "loss": 0.2492, "num_tokens": 1276030549.0, "step": 6687 }, { "epoch": 2.282642089093702, "grad_norm": 0.2825869082338628, "learning_rate": 1.3277693474962064e-05, "loss": 0.2419, "num_tokens": 1276239957.0, "step": 6688 }, { "epoch": 2.282983444273767, "grad_norm": 0.2287040680332004, "learning_rate": 1.3271370763783511e-05, "loss": 0.2474, "num_tokens": 1276450206.0, "step": 6689 }, { "epoch": 2.2833247994538315, "grad_norm": 0.2367735074592898, "learning_rate": 1.3265048052604958e-05, "loss": 0.2287, "num_tokens": 1276632421.0, "step": 6690 }, { "epoch": 2.2836661546338966, "grad_norm": 0.23739435414449206, "learning_rate": 1.3258725341426403e-05, "loss": 0.2407, "num_tokens": 1276818699.0, "step": 6691 }, { "epoch": 2.2840075098139616, "grad_norm": 0.23447684612824654, "learning_rate": 1.325240263024785e-05, "loss": 0.2453, "num_tokens": 1277022054.0, "step": 6692 }, { "epoch": 2.2843488649940262, "grad_norm": 0.24218607248720922, "learning_rate": 1.3246079919069299e-05, "loss": 0.2276, "num_tokens": 1277202701.0, "step": 6693 }, { "epoch": 2.2846902201740913, "grad_norm": 0.23521614101811275, "learning_rate": 1.3239757207890743e-05, "loss": 0.2385, "num_tokens": 1277400027.0, "step": 6694 }, { "epoch": 2.285031575354156, "grad_norm": 0.24838145534336728, "learning_rate": 1.3233434496712191e-05, "loss": 0.2395, "num_tokens": 1277582191.0, "step": 6695 }, { "epoch": 2.285372930534221, "grad_norm": 0.2623150363954273, "learning_rate": 1.3227111785533638e-05, "loss": 0.2147, "num_tokens": 1277737389.0, "step": 6696 }, { "epoch": 2.2857142857142856, "grad_norm": 0.2403855875689026, "learning_rate": 1.3220789074355083e-05, "loss": 0.2302, "num_tokens": 1277947823.0, "step": 6697 }, { "epoch": 2.2860556408943506, "grad_norm": 0.22087467860269205, "learning_rate": 1.321446636317653e-05, "loss": 0.2266, "num_tokens": 1278169219.0, "step": 6698 }, { "epoch": 2.2863969960744153, "grad_norm": 0.2221964708396703, "learning_rate": 1.320814365199798e-05, "loss": 0.2108, "num_tokens": 1278370169.0, "step": 6699 }, { "epoch": 2.2867383512544803, "grad_norm": 0.2294672689575956, "learning_rate": 1.3201820940819423e-05, "loss": 0.2252, "num_tokens": 1278580835.0, "step": 6700 }, { "epoch": 2.287079706434545, "grad_norm": 0.2606598635862517, "learning_rate": 1.3195498229640871e-05, "loss": 0.2312, "num_tokens": 1278747660.0, "step": 6701 }, { "epoch": 2.28742106161461, "grad_norm": 0.23735768300738266, "learning_rate": 1.3189175518462318e-05, "loss": 0.2273, "num_tokens": 1278933915.0, "step": 6702 }, { "epoch": 2.287762416794675, "grad_norm": 0.22803286481705742, "learning_rate": 1.3182852807283764e-05, "loss": 0.2345, "num_tokens": 1279120455.0, "step": 6703 }, { "epoch": 2.2881037719747397, "grad_norm": 0.23415876784402365, "learning_rate": 1.317653009610521e-05, "loss": 0.2315, "num_tokens": 1279307874.0, "step": 6704 }, { "epoch": 2.2884451271548047, "grad_norm": 0.23219604229626425, "learning_rate": 1.3170207384926658e-05, "loss": 0.2255, "num_tokens": 1279495661.0, "step": 6705 }, { "epoch": 2.2887864823348694, "grad_norm": 0.23900424120688435, "learning_rate": 1.3163884673748103e-05, "loss": 0.2237, "num_tokens": 1279684481.0, "step": 6706 }, { "epoch": 2.2891278375149344, "grad_norm": 0.23414855791525782, "learning_rate": 1.315756196256955e-05, "loss": 0.2111, "num_tokens": 1279881492.0, "step": 6707 }, { "epoch": 2.289469192694999, "grad_norm": 0.2504981033842688, "learning_rate": 1.3151239251390999e-05, "loss": 0.245, "num_tokens": 1280072301.0, "step": 6708 }, { "epoch": 2.289810547875064, "grad_norm": 0.2518528761065228, "learning_rate": 1.3144916540212442e-05, "loss": 0.2185, "num_tokens": 1280221992.0, "step": 6709 }, { "epoch": 2.2901519030551287, "grad_norm": 0.24330333270420842, "learning_rate": 1.3138593829033891e-05, "loss": 0.2458, "num_tokens": 1280424798.0, "step": 6710 }, { "epoch": 2.290493258235194, "grad_norm": 0.24491060042129936, "learning_rate": 1.3132271117855338e-05, "loss": 0.2328, "num_tokens": 1280634864.0, "step": 6711 }, { "epoch": 2.2908346134152584, "grad_norm": 0.23315811193087493, "learning_rate": 1.3125948406676783e-05, "loss": 0.2086, "num_tokens": 1280812640.0, "step": 6712 }, { "epoch": 2.2911759685953235, "grad_norm": 0.23976837372987028, "learning_rate": 1.311962569549823e-05, "loss": 0.2413, "num_tokens": 1280991489.0, "step": 6713 }, { "epoch": 2.2915173237753885, "grad_norm": 0.48472632490566653, "learning_rate": 1.3113302984319679e-05, "loss": 0.2247, "num_tokens": 1281189913.0, "step": 6714 }, { "epoch": 2.291858678955453, "grad_norm": 0.2339563594233088, "learning_rate": 1.3106980273141122e-05, "loss": 0.2101, "num_tokens": 1281370759.0, "step": 6715 }, { "epoch": 2.2922000341355178, "grad_norm": 0.2256852227288855, "learning_rate": 1.3100657561962571e-05, "loss": 0.259, "num_tokens": 1281606146.0, "step": 6716 }, { "epoch": 2.292541389315583, "grad_norm": 0.21684948957290553, "learning_rate": 1.3094334850784018e-05, "loss": 0.2245, "num_tokens": 1281830094.0, "step": 6717 }, { "epoch": 2.292882744495648, "grad_norm": 0.24154863821438224, "learning_rate": 1.3088012139605463e-05, "loss": 0.2386, "num_tokens": 1282013874.0, "step": 6718 }, { "epoch": 2.2932240996757125, "grad_norm": 0.20934410373448578, "learning_rate": 1.308168942842691e-05, "loss": 0.2555, "num_tokens": 1282257394.0, "step": 6719 }, { "epoch": 2.2935654548557776, "grad_norm": 0.23688063506406892, "learning_rate": 1.3075366717248357e-05, "loss": 0.2193, "num_tokens": 1282425245.0, "step": 6720 }, { "epoch": 2.293906810035842, "grad_norm": 0.2494141596209731, "learning_rate": 1.3069044006069803e-05, "loss": 0.2091, "num_tokens": 1282591243.0, "step": 6721 }, { "epoch": 2.2942481652159072, "grad_norm": 0.24379660569720402, "learning_rate": 1.306272129489125e-05, "loss": 0.2245, "num_tokens": 1282769449.0, "step": 6722 }, { "epoch": 2.294589520395972, "grad_norm": 0.23050037254314754, "learning_rate": 1.3056398583712698e-05, "loss": 0.2487, "num_tokens": 1282982812.0, "step": 6723 }, { "epoch": 2.294930875576037, "grad_norm": 0.24797778214214547, "learning_rate": 1.3050075872534142e-05, "loss": 0.266, "num_tokens": 1283197572.0, "step": 6724 }, { "epoch": 2.2952722307561015, "grad_norm": 0.24239471630477447, "learning_rate": 1.304375316135559e-05, "loss": 0.2086, "num_tokens": 1283360076.0, "step": 6725 }, { "epoch": 2.2956135859361666, "grad_norm": 0.24288354066592932, "learning_rate": 1.3037430450177038e-05, "loss": 0.2521, "num_tokens": 1283567391.0, "step": 6726 }, { "epoch": 2.295954941116231, "grad_norm": 0.267849056636356, "learning_rate": 1.3031107738998483e-05, "loss": 0.2409, "num_tokens": 1283746342.0, "step": 6727 }, { "epoch": 2.2962962962962963, "grad_norm": 0.2176583207908161, "learning_rate": 1.302478502781993e-05, "loss": 0.2352, "num_tokens": 1283937950.0, "step": 6728 }, { "epoch": 2.2966376514763613, "grad_norm": 0.23238468090928543, "learning_rate": 1.3018462316641378e-05, "loss": 0.2268, "num_tokens": 1284160397.0, "step": 6729 }, { "epoch": 2.296979006656426, "grad_norm": 0.24698782148226273, "learning_rate": 1.3012139605462822e-05, "loss": 0.2274, "num_tokens": 1284351029.0, "step": 6730 }, { "epoch": 2.297320361836491, "grad_norm": 0.22243466845632692, "learning_rate": 1.300581689428427e-05, "loss": 0.2413, "num_tokens": 1284564576.0, "step": 6731 }, { "epoch": 2.2976617170165556, "grad_norm": 0.2409022656356351, "learning_rate": 1.2999494183105718e-05, "loss": 0.2104, "num_tokens": 1284732394.0, "step": 6732 }, { "epoch": 2.2980030721966207, "grad_norm": 0.2470622228774713, "learning_rate": 1.2993171471927163e-05, "loss": 0.2259, "num_tokens": 1284916245.0, "step": 6733 }, { "epoch": 2.2983444273766853, "grad_norm": 0.28681322846390944, "learning_rate": 1.298684876074861e-05, "loss": 0.2246, "num_tokens": 1285115690.0, "step": 6734 }, { "epoch": 2.2986857825567504, "grad_norm": 0.23734859354452742, "learning_rate": 1.2980526049570057e-05, "loss": 0.2217, "num_tokens": 1285324254.0, "step": 6735 }, { "epoch": 2.299027137736815, "grad_norm": 0.20878046240344297, "learning_rate": 1.2974203338391502e-05, "loss": 0.2305, "num_tokens": 1285560931.0, "step": 6736 }, { "epoch": 2.29936849291688, "grad_norm": 0.24813464637324678, "learning_rate": 1.296788062721295e-05, "loss": 0.2054, "num_tokens": 1285705207.0, "step": 6737 }, { "epoch": 2.2997098480969447, "grad_norm": 0.24052108422945595, "learning_rate": 1.2961557916034398e-05, "loss": 0.2157, "num_tokens": 1285892656.0, "step": 6738 }, { "epoch": 2.3000512032770097, "grad_norm": 0.23732229956557993, "learning_rate": 1.2955235204855842e-05, "loss": 0.2371, "num_tokens": 1286069684.0, "step": 6739 }, { "epoch": 2.300392558457075, "grad_norm": 0.2550879236485114, "learning_rate": 1.294891249367729e-05, "loss": 0.2412, "num_tokens": 1286276246.0, "step": 6740 }, { "epoch": 2.3007339136371394, "grad_norm": 0.22355851291025394, "learning_rate": 1.2942589782498737e-05, "loss": 0.2321, "num_tokens": 1286484848.0, "step": 6741 }, { "epoch": 2.3010752688172045, "grad_norm": 0.24291707964193807, "learning_rate": 1.2936267071320182e-05, "loss": 0.2342, "num_tokens": 1286683843.0, "step": 6742 }, { "epoch": 2.301416623997269, "grad_norm": 0.2301331347924623, "learning_rate": 1.292994436014163e-05, "loss": 0.2249, "num_tokens": 1286871615.0, "step": 6743 }, { "epoch": 2.301757979177334, "grad_norm": 0.24563307718813646, "learning_rate": 1.2923621648963078e-05, "loss": 0.224, "num_tokens": 1287042036.0, "step": 6744 }, { "epoch": 2.3020993343573988, "grad_norm": 0.23301725848679658, "learning_rate": 1.2917298937784522e-05, "loss": 0.2423, "num_tokens": 1287241361.0, "step": 6745 }, { "epoch": 2.302440689537464, "grad_norm": 0.2202356208563215, "learning_rate": 1.291097622660597e-05, "loss": 0.2184, "num_tokens": 1287440174.0, "step": 6746 }, { "epoch": 2.3027820447175285, "grad_norm": 0.2276755363516013, "learning_rate": 1.2904653515427414e-05, "loss": 0.2108, "num_tokens": 1287638046.0, "step": 6747 }, { "epoch": 2.3031233998975935, "grad_norm": 0.2734841409634801, "learning_rate": 1.2898330804248863e-05, "loss": 0.2339, "num_tokens": 1287795023.0, "step": 6748 }, { "epoch": 2.303464755077658, "grad_norm": 0.2668064748875653, "learning_rate": 1.289200809307031e-05, "loss": 0.2381, "num_tokens": 1288016093.0, "step": 6749 }, { "epoch": 2.303806110257723, "grad_norm": 0.23233841599413477, "learning_rate": 1.2885685381891755e-05, "loss": 0.2403, "num_tokens": 1288197481.0, "step": 6750 }, { "epoch": 2.3041474654377883, "grad_norm": 0.2415844366642552, "learning_rate": 1.2879362670713202e-05, "loss": 0.25, "num_tokens": 1288426384.0, "step": 6751 }, { "epoch": 2.304488820617853, "grad_norm": 0.22862096365151716, "learning_rate": 1.2873039959534649e-05, "loss": 0.2209, "num_tokens": 1288620191.0, "step": 6752 }, { "epoch": 2.3048301757979175, "grad_norm": 0.25783801658924366, "learning_rate": 1.2866717248356094e-05, "loss": 0.2048, "num_tokens": 1288790602.0, "step": 6753 }, { "epoch": 2.3051715309779826, "grad_norm": 0.23544494260807564, "learning_rate": 1.2860394537177541e-05, "loss": 0.2409, "num_tokens": 1288999384.0, "step": 6754 }, { "epoch": 2.3055128861580476, "grad_norm": 0.27618967334819927, "learning_rate": 1.285407182599899e-05, "loss": 0.2548, "num_tokens": 1289180061.0, "step": 6755 }, { "epoch": 2.3058542413381122, "grad_norm": 0.2350417343334855, "learning_rate": 1.2847749114820433e-05, "loss": 0.2422, "num_tokens": 1289378473.0, "step": 6756 }, { "epoch": 2.3061955965181773, "grad_norm": 0.25890759711177397, "learning_rate": 1.2841426403641882e-05, "loss": 0.2063, "num_tokens": 1289517064.0, "step": 6757 }, { "epoch": 2.306536951698242, "grad_norm": 0.23908611880837946, "learning_rate": 1.2835103692463329e-05, "loss": 0.2365, "num_tokens": 1289727977.0, "step": 6758 }, { "epoch": 2.306878306878307, "grad_norm": 0.21139768696180078, "learning_rate": 1.2828780981284774e-05, "loss": 0.2422, "num_tokens": 1289978230.0, "step": 6759 }, { "epoch": 2.3072196620583716, "grad_norm": 0.24940440455078447, "learning_rate": 1.2822458270106221e-05, "loss": 0.2417, "num_tokens": 1290169799.0, "step": 6760 }, { "epoch": 2.3075610172384367, "grad_norm": 0.2418039599506848, "learning_rate": 1.281613555892767e-05, "loss": 0.2447, "num_tokens": 1290368200.0, "step": 6761 }, { "epoch": 2.3079023724185013, "grad_norm": 0.23665409365679352, "learning_rate": 1.2809812847749114e-05, "loss": 0.2214, "num_tokens": 1290535505.0, "step": 6762 }, { "epoch": 2.3082437275985663, "grad_norm": 0.24594182392701092, "learning_rate": 1.2803490136570562e-05, "loss": 0.2174, "num_tokens": 1290706470.0, "step": 6763 }, { "epoch": 2.308585082778631, "grad_norm": 0.22109551313695697, "learning_rate": 1.279716742539201e-05, "loss": 0.2203, "num_tokens": 1290911940.0, "step": 6764 }, { "epoch": 2.308926437958696, "grad_norm": 0.2038687163671181, "learning_rate": 1.2790844714213455e-05, "loss": 0.2213, "num_tokens": 1291138714.0, "step": 6765 }, { "epoch": 2.309267793138761, "grad_norm": 0.2399384950330654, "learning_rate": 1.2784522003034902e-05, "loss": 0.2466, "num_tokens": 1291349130.0, "step": 6766 }, { "epoch": 2.3096091483188257, "grad_norm": 0.2423647658823833, "learning_rate": 1.2778199291856349e-05, "loss": 0.2434, "num_tokens": 1291555657.0, "step": 6767 }, { "epoch": 2.3099505034988908, "grad_norm": 0.37522336843632786, "learning_rate": 1.2771876580677794e-05, "loss": 0.2164, "num_tokens": 1291725470.0, "step": 6768 }, { "epoch": 2.3102918586789554, "grad_norm": 0.26295503445895607, "learning_rate": 1.2765553869499241e-05, "loss": 0.1989, "num_tokens": 1291887453.0, "step": 6769 }, { "epoch": 2.3106332138590204, "grad_norm": 0.23668369596311573, "learning_rate": 1.275923115832069e-05, "loss": 0.2208, "num_tokens": 1292050832.0, "step": 6770 }, { "epoch": 2.310974569039085, "grad_norm": 0.22973330981455237, "learning_rate": 1.2752908447142133e-05, "loss": 0.2269, "num_tokens": 1292268833.0, "step": 6771 }, { "epoch": 2.31131592421915, "grad_norm": 0.25045921332663373, "learning_rate": 1.2746585735963582e-05, "loss": 0.2294, "num_tokens": 1292451007.0, "step": 6772 }, { "epoch": 2.3116572793992147, "grad_norm": 0.25933316223155256, "learning_rate": 1.2740263024785029e-05, "loss": 0.2254, "num_tokens": 1292607528.0, "step": 6773 }, { "epoch": 2.31199863457928, "grad_norm": 0.2408730608888197, "learning_rate": 1.2733940313606474e-05, "loss": 0.2306, "num_tokens": 1292811844.0, "step": 6774 }, { "epoch": 2.3123399897593444, "grad_norm": 0.2341201455129288, "learning_rate": 1.2727617602427921e-05, "loss": 0.247, "num_tokens": 1293012289.0, "step": 6775 }, { "epoch": 2.3126813449394095, "grad_norm": 0.23829794445870695, "learning_rate": 1.272129489124937e-05, "loss": 0.2197, "num_tokens": 1293193977.0, "step": 6776 }, { "epoch": 2.3130227001194745, "grad_norm": 0.2509084287127782, "learning_rate": 1.2714972180070813e-05, "loss": 0.2267, "num_tokens": 1293357051.0, "step": 6777 }, { "epoch": 2.313364055299539, "grad_norm": 0.24764257055326935, "learning_rate": 1.2708649468892262e-05, "loss": 0.2345, "num_tokens": 1293547967.0, "step": 6778 }, { "epoch": 2.313705410479604, "grad_norm": 0.25666395195701547, "learning_rate": 1.2702326757713709e-05, "loss": 0.2438, "num_tokens": 1293737975.0, "step": 6779 }, { "epoch": 2.314046765659669, "grad_norm": 0.23071728378740708, "learning_rate": 1.2696004046535154e-05, "loss": 0.2147, "num_tokens": 1293917340.0, "step": 6780 }, { "epoch": 2.314388120839734, "grad_norm": 0.22334428491891026, "learning_rate": 1.2689681335356601e-05, "loss": 0.213, "num_tokens": 1294127979.0, "step": 6781 }, { "epoch": 2.3147294760197985, "grad_norm": 0.22608714601020835, "learning_rate": 1.2683358624178048e-05, "loss": 0.2302, "num_tokens": 1294329847.0, "step": 6782 }, { "epoch": 2.3150708311998636, "grad_norm": 0.1982657546755997, "learning_rate": 1.2677035912999494e-05, "loss": 0.2175, "num_tokens": 1294559362.0, "step": 6783 }, { "epoch": 2.315412186379928, "grad_norm": 0.2479297574550595, "learning_rate": 1.267071320182094e-05, "loss": 0.2206, "num_tokens": 1294731056.0, "step": 6784 }, { "epoch": 2.3157535415599932, "grad_norm": 0.2699663477197648, "learning_rate": 1.266439049064239e-05, "loss": 0.2368, "num_tokens": 1294896601.0, "step": 6785 }, { "epoch": 2.316094896740058, "grad_norm": 0.23091742956554767, "learning_rate": 1.2658067779463833e-05, "loss": 0.2099, "num_tokens": 1295081294.0, "step": 6786 }, { "epoch": 2.316436251920123, "grad_norm": 0.24175720450765448, "learning_rate": 1.2651745068285281e-05, "loss": 0.1979, "num_tokens": 1295247010.0, "step": 6787 }, { "epoch": 2.316777607100188, "grad_norm": 0.22189300880512178, "learning_rate": 1.2645422357106728e-05, "loss": 0.2256, "num_tokens": 1295488588.0, "step": 6788 }, { "epoch": 2.3171189622802526, "grad_norm": 0.23435391225149496, "learning_rate": 1.2639099645928174e-05, "loss": 0.2132, "num_tokens": 1295679110.0, "step": 6789 }, { "epoch": 2.317460317460317, "grad_norm": 0.23044795672229798, "learning_rate": 1.263277693474962e-05, "loss": 0.2186, "num_tokens": 1295854658.0, "step": 6790 }, { "epoch": 2.3178016726403823, "grad_norm": 0.23858963181296589, "learning_rate": 1.262645422357107e-05, "loss": 0.2122, "num_tokens": 1296017000.0, "step": 6791 }, { "epoch": 2.3181430278204473, "grad_norm": 0.3001321613667481, "learning_rate": 1.2620131512392513e-05, "loss": 0.2424, "num_tokens": 1296236761.0, "step": 6792 }, { "epoch": 2.318484383000512, "grad_norm": 0.21813053510036834, "learning_rate": 1.2613808801213962e-05, "loss": 0.2109, "num_tokens": 1296415478.0, "step": 6793 }, { "epoch": 2.318825738180577, "grad_norm": 0.26440406471279554, "learning_rate": 1.2607486090035409e-05, "loss": 0.2298, "num_tokens": 1296587069.0, "step": 6794 }, { "epoch": 2.3191670933606416, "grad_norm": 0.24958544507507824, "learning_rate": 1.2601163378856854e-05, "loss": 0.2423, "num_tokens": 1296754208.0, "step": 6795 }, { "epoch": 2.3195084485407067, "grad_norm": 0.2441152242174737, "learning_rate": 1.2594840667678301e-05, "loss": 0.2323, "num_tokens": 1296912689.0, "step": 6796 }, { "epoch": 2.3198498037207713, "grad_norm": 0.21924306496119353, "learning_rate": 1.2588517956499748e-05, "loss": 0.2305, "num_tokens": 1297118668.0, "step": 6797 }, { "epoch": 2.3201911589008364, "grad_norm": 0.22414676873672726, "learning_rate": 1.2582195245321193e-05, "loss": 0.2526, "num_tokens": 1297345424.0, "step": 6798 }, { "epoch": 2.320532514080901, "grad_norm": 0.2482232623755034, "learning_rate": 1.257587253414264e-05, "loss": 0.2153, "num_tokens": 1297506630.0, "step": 6799 }, { "epoch": 2.320873869260966, "grad_norm": 0.2344141967819955, "learning_rate": 1.2569549822964089e-05, "loss": 0.2344, "num_tokens": 1297707484.0, "step": 6800 }, { "epoch": 2.3212152244410307, "grad_norm": 0.22108154303664065, "learning_rate": 1.2563227111785532e-05, "loss": 0.2408, "num_tokens": 1297939890.0, "step": 6801 }, { "epoch": 2.3215565796210957, "grad_norm": 0.26056048387977276, "learning_rate": 1.2556904400606981e-05, "loss": 0.2016, "num_tokens": 1298087357.0, "step": 6802 }, { "epoch": 2.321897934801161, "grad_norm": 0.2502168773521216, "learning_rate": 1.2550581689428428e-05, "loss": 0.2255, "num_tokens": 1298258662.0, "step": 6803 }, { "epoch": 2.3222392899812254, "grad_norm": 0.24901993678317852, "learning_rate": 1.2544258978249873e-05, "loss": 0.2397, "num_tokens": 1298425911.0, "step": 6804 }, { "epoch": 2.3225806451612905, "grad_norm": 0.23999932566857549, "learning_rate": 1.253793626707132e-05, "loss": 0.2368, "num_tokens": 1298639901.0, "step": 6805 }, { "epoch": 2.322922000341355, "grad_norm": 0.22617922375133967, "learning_rate": 1.2531613555892769e-05, "loss": 0.2209, "num_tokens": 1298834305.0, "step": 6806 }, { "epoch": 2.32326335552142, "grad_norm": 0.22296072166017117, "learning_rate": 1.2525290844714213e-05, "loss": 0.2348, "num_tokens": 1299044521.0, "step": 6807 }, { "epoch": 2.3236047107014848, "grad_norm": 0.2564534341989548, "learning_rate": 1.2518968133535661e-05, "loss": 0.2439, "num_tokens": 1299243583.0, "step": 6808 }, { "epoch": 2.32394606588155, "grad_norm": 0.250961478131239, "learning_rate": 1.2512645422357108e-05, "loss": 0.2222, "num_tokens": 1299422521.0, "step": 6809 }, { "epoch": 2.3242874210616145, "grad_norm": 0.2585793784557497, "learning_rate": 1.2506322711178554e-05, "loss": 0.2292, "num_tokens": 1299583180.0, "step": 6810 }, { "epoch": 2.3246287762416795, "grad_norm": 0.2580072720275482, "learning_rate": 1.25e-05, "loss": 0.2393, "num_tokens": 1299755178.0, "step": 6811 }, { "epoch": 2.324970131421744, "grad_norm": 0.27078005091397267, "learning_rate": 1.2493677288821448e-05, "loss": 0.2362, "num_tokens": 1299929527.0, "step": 6812 }, { "epoch": 2.325311486601809, "grad_norm": 0.2005525642897929, "learning_rate": 1.2487354577642895e-05, "loss": 0.2233, "num_tokens": 1300157316.0, "step": 6813 }, { "epoch": 2.3256528417818743, "grad_norm": 0.23727320396171506, "learning_rate": 1.248103186646434e-05, "loss": 0.2319, "num_tokens": 1300356062.0, "step": 6814 }, { "epoch": 2.325994196961939, "grad_norm": 0.24595321351935698, "learning_rate": 1.2474709155285787e-05, "loss": 0.2295, "num_tokens": 1300540422.0, "step": 6815 }, { "epoch": 2.326335552142004, "grad_norm": 0.23455579038500982, "learning_rate": 1.2468386444107234e-05, "loss": 0.2518, "num_tokens": 1300773983.0, "step": 6816 }, { "epoch": 2.3266769073220686, "grad_norm": 0.24671881488689046, "learning_rate": 1.246206373292868e-05, "loss": 0.2414, "num_tokens": 1300967805.0, "step": 6817 }, { "epoch": 2.3270182625021336, "grad_norm": 0.26584329309289756, "learning_rate": 1.2455741021750126e-05, "loss": 0.2467, "num_tokens": 1301136082.0, "step": 6818 }, { "epoch": 2.3273596176821982, "grad_norm": 0.2392382529875849, "learning_rate": 1.2449418310571575e-05, "loss": 0.2279, "num_tokens": 1301340705.0, "step": 6819 }, { "epoch": 2.3277009728622633, "grad_norm": 0.219574654022943, "learning_rate": 1.244309559939302e-05, "loss": 0.2492, "num_tokens": 1301561086.0, "step": 6820 }, { "epoch": 2.328042328042328, "grad_norm": 0.22614020304046303, "learning_rate": 1.2436772888214467e-05, "loss": 0.2387, "num_tokens": 1301753425.0, "step": 6821 }, { "epoch": 2.328383683222393, "grad_norm": 0.26696999673948874, "learning_rate": 1.2430450177035914e-05, "loss": 0.2232, "num_tokens": 1301905934.0, "step": 6822 }, { "epoch": 2.3287250384024576, "grad_norm": 0.24871621680804895, "learning_rate": 1.2424127465857361e-05, "loss": 0.221, "num_tokens": 1302091119.0, "step": 6823 }, { "epoch": 2.3290663935825227, "grad_norm": 0.24621642171500435, "learning_rate": 1.2417804754678806e-05, "loss": 0.2145, "num_tokens": 1302276706.0, "step": 6824 }, { "epoch": 2.3294077487625877, "grad_norm": 0.2470994623745924, "learning_rate": 1.2411482043500253e-05, "loss": 0.2289, "num_tokens": 1302449207.0, "step": 6825 }, { "epoch": 2.3297491039426523, "grad_norm": 0.25503310462386813, "learning_rate": 1.24051593323217e-05, "loss": 0.2154, "num_tokens": 1302589993.0, "step": 6826 }, { "epoch": 2.330090459122717, "grad_norm": 0.2513458402554772, "learning_rate": 1.2398836621143147e-05, "loss": 0.2439, "num_tokens": 1302780331.0, "step": 6827 }, { "epoch": 2.330431814302782, "grad_norm": 0.23554079578763948, "learning_rate": 1.2392513909964594e-05, "loss": 0.2211, "num_tokens": 1302977340.0, "step": 6828 }, { "epoch": 2.330773169482847, "grad_norm": 0.2542324744298144, "learning_rate": 1.238619119878604e-05, "loss": 0.2265, "num_tokens": 1303157952.0, "step": 6829 }, { "epoch": 2.3311145246629117, "grad_norm": 0.23079614019806982, "learning_rate": 1.2379868487607487e-05, "loss": 0.2113, "num_tokens": 1303337825.0, "step": 6830 }, { "epoch": 2.3314558798429768, "grad_norm": 0.23400296937824155, "learning_rate": 1.2373545776428933e-05, "loss": 0.2366, "num_tokens": 1303522453.0, "step": 6831 }, { "epoch": 2.3317972350230414, "grad_norm": 0.2528103277045711, "learning_rate": 1.236722306525038e-05, "loss": 0.2417, "num_tokens": 1303698679.0, "step": 6832 }, { "epoch": 2.3321385902031064, "grad_norm": 0.22176294659534496, "learning_rate": 1.2360900354071826e-05, "loss": 0.2453, "num_tokens": 1303914159.0, "step": 6833 }, { "epoch": 2.332479945383171, "grad_norm": 0.24657941797540897, "learning_rate": 1.2354577642893274e-05, "loss": 0.223, "num_tokens": 1304090162.0, "step": 6834 }, { "epoch": 2.332821300563236, "grad_norm": 0.2527064373504874, "learning_rate": 1.234825493171472e-05, "loss": 0.2481, "num_tokens": 1304280152.0, "step": 6835 }, { "epoch": 2.3331626557433007, "grad_norm": 0.23427765719980398, "learning_rate": 1.2341932220536167e-05, "loss": 0.227, "num_tokens": 1304483798.0, "step": 6836 }, { "epoch": 2.333504010923366, "grad_norm": 0.24948055612267958, "learning_rate": 1.2335609509357614e-05, "loss": 0.2104, "num_tokens": 1304678476.0, "step": 6837 }, { "epoch": 2.3338453661034304, "grad_norm": 0.26792833382901576, "learning_rate": 1.232928679817906e-05, "loss": 0.2091, "num_tokens": 1304848842.0, "step": 6838 }, { "epoch": 2.3341867212834955, "grad_norm": 0.23717159649942515, "learning_rate": 1.2322964087000506e-05, "loss": 0.2569, "num_tokens": 1305071284.0, "step": 6839 }, { "epoch": 2.3345280764635605, "grad_norm": 0.24187748405406115, "learning_rate": 1.2316641375821953e-05, "loss": 0.2213, "num_tokens": 1305257294.0, "step": 6840 }, { "epoch": 2.334869431643625, "grad_norm": 0.23594777430130823, "learning_rate": 1.23103186646434e-05, "loss": 0.2383, "num_tokens": 1305459109.0, "step": 6841 }, { "epoch": 2.33521078682369, "grad_norm": 0.2296105457679537, "learning_rate": 1.2303995953464847e-05, "loss": 0.2155, "num_tokens": 1305630623.0, "step": 6842 }, { "epoch": 2.335552142003755, "grad_norm": 0.22168414951277735, "learning_rate": 1.2297673242286294e-05, "loss": 0.2059, "num_tokens": 1305806493.0, "step": 6843 }, { "epoch": 2.33589349718382, "grad_norm": 0.22015090339288654, "learning_rate": 1.229135053110774e-05, "loss": 0.2307, "num_tokens": 1305995107.0, "step": 6844 }, { "epoch": 2.3362348523638845, "grad_norm": 0.22747688100932018, "learning_rate": 1.2285027819929186e-05, "loss": 0.222, "num_tokens": 1306192013.0, "step": 6845 }, { "epoch": 2.3365762075439496, "grad_norm": 0.2773221811173618, "learning_rate": 1.2278705108750631e-05, "loss": 0.2307, "num_tokens": 1306350866.0, "step": 6846 }, { "epoch": 2.336917562724014, "grad_norm": 0.22449075219601541, "learning_rate": 1.227238239757208e-05, "loss": 0.2174, "num_tokens": 1306534506.0, "step": 6847 }, { "epoch": 2.3372589179040792, "grad_norm": 0.22513301684185136, "learning_rate": 1.2266059686393525e-05, "loss": 0.2281, "num_tokens": 1306744670.0, "step": 6848 }, { "epoch": 2.337600273084144, "grad_norm": 0.2354458401977868, "learning_rate": 1.2259736975214972e-05, "loss": 0.24, "num_tokens": 1306953969.0, "step": 6849 }, { "epoch": 2.337941628264209, "grad_norm": 0.24882180232304565, "learning_rate": 1.225341426403642e-05, "loss": 0.2411, "num_tokens": 1307160130.0, "step": 6850 }, { "epoch": 2.338282983444274, "grad_norm": 0.22383114206677826, "learning_rate": 1.2247091552857866e-05, "loss": 0.2277, "num_tokens": 1307344471.0, "step": 6851 }, { "epoch": 2.3386243386243386, "grad_norm": 0.27407437738083545, "learning_rate": 1.2240768841679312e-05, "loss": 0.2425, "num_tokens": 1307500598.0, "step": 6852 }, { "epoch": 2.3389656938044037, "grad_norm": 0.2296399613734741, "learning_rate": 1.223444613050076e-05, "loss": 0.2133, "num_tokens": 1307681689.0, "step": 6853 }, { "epoch": 2.3393070489844683, "grad_norm": 0.207182110946717, "learning_rate": 1.2228123419322206e-05, "loss": 0.2383, "num_tokens": 1307909266.0, "step": 6854 }, { "epoch": 2.3396484041645333, "grad_norm": 0.226755477699632, "learning_rate": 1.2221800708143653e-05, "loss": 0.2241, "num_tokens": 1308090484.0, "step": 6855 }, { "epoch": 2.339989759344598, "grad_norm": 0.24034434099386903, "learning_rate": 1.22154779969651e-05, "loss": 0.2219, "num_tokens": 1308273735.0, "step": 6856 }, { "epoch": 2.340331114524663, "grad_norm": 0.25088743148705445, "learning_rate": 1.2209155285786545e-05, "loss": 0.2386, "num_tokens": 1308434677.0, "step": 6857 }, { "epoch": 2.3406724697047276, "grad_norm": 0.24321477390205187, "learning_rate": 1.2202832574607992e-05, "loss": 0.2335, "num_tokens": 1308608056.0, "step": 6858 }, { "epoch": 2.3410138248847927, "grad_norm": 0.25726846851449564, "learning_rate": 1.2196509863429439e-05, "loss": 0.2299, "num_tokens": 1308775852.0, "step": 6859 }, { "epoch": 2.3413551800648573, "grad_norm": 0.24916798328798762, "learning_rate": 1.2190187152250886e-05, "loss": 0.2442, "num_tokens": 1308966824.0, "step": 6860 }, { "epoch": 2.3416965352449224, "grad_norm": 0.22748515178525963, "learning_rate": 1.2183864441072331e-05, "loss": 0.2077, "num_tokens": 1309143161.0, "step": 6861 }, { "epoch": 2.3420378904249874, "grad_norm": 0.2448829358100461, "learning_rate": 1.217754172989378e-05, "loss": 0.2296, "num_tokens": 1309319479.0, "step": 6862 }, { "epoch": 2.342379245605052, "grad_norm": 0.23297072479041772, "learning_rate": 1.2171219018715225e-05, "loss": 0.2391, "num_tokens": 1309530519.0, "step": 6863 }, { "epoch": 2.3427206007851167, "grad_norm": 0.2629835226481367, "learning_rate": 1.2164896307536672e-05, "loss": 0.2525, "num_tokens": 1309731852.0, "step": 6864 }, { "epoch": 2.3430619559651817, "grad_norm": 0.22375856122397447, "learning_rate": 1.2158573596358119e-05, "loss": 0.234, "num_tokens": 1309922416.0, "step": 6865 }, { "epoch": 2.343403311145247, "grad_norm": 0.2451135539114972, "learning_rate": 1.2152250885179566e-05, "loss": 0.2108, "num_tokens": 1310085973.0, "step": 6866 }, { "epoch": 2.3437446663253114, "grad_norm": 0.2432139702856457, "learning_rate": 1.2145928174001011e-05, "loss": 0.2217, "num_tokens": 1310263009.0, "step": 6867 }, { "epoch": 2.3440860215053765, "grad_norm": 0.25175007459930465, "learning_rate": 1.213960546282246e-05, "loss": 0.2491, "num_tokens": 1310458898.0, "step": 6868 }, { "epoch": 2.344427376685441, "grad_norm": 0.2577415054386387, "learning_rate": 1.2133282751643905e-05, "loss": 0.2432, "num_tokens": 1310646193.0, "step": 6869 }, { "epoch": 2.344768731865506, "grad_norm": 0.21785269977280863, "learning_rate": 1.2126960040465352e-05, "loss": 0.2357, "num_tokens": 1310856163.0, "step": 6870 }, { "epoch": 2.345110087045571, "grad_norm": 0.2507917617161752, "learning_rate": 1.21206373292868e-05, "loss": 0.228, "num_tokens": 1311039429.0, "step": 6871 }, { "epoch": 2.345451442225636, "grad_norm": 0.22839226286612224, "learning_rate": 1.2114314618108245e-05, "loss": 0.2048, "num_tokens": 1311213758.0, "step": 6872 }, { "epoch": 2.3457927974057005, "grad_norm": 0.248605091396381, "learning_rate": 1.2107991906929692e-05, "loss": 0.2123, "num_tokens": 1311384330.0, "step": 6873 }, { "epoch": 2.3461341525857655, "grad_norm": 0.25860782609187916, "learning_rate": 1.2101669195751139e-05, "loss": 0.197, "num_tokens": 1311532553.0, "step": 6874 }, { "epoch": 2.34647550776583, "grad_norm": 0.2578630426381319, "learning_rate": 1.2095346484572585e-05, "loss": 0.2177, "num_tokens": 1311691802.0, "step": 6875 }, { "epoch": 2.346816862945895, "grad_norm": 0.23502485129558917, "learning_rate": 1.208902377339403e-05, "loss": 0.2476, "num_tokens": 1311880988.0, "step": 6876 }, { "epoch": 2.3471582181259603, "grad_norm": 0.2496198082588777, "learning_rate": 1.208270106221548e-05, "loss": 0.239, "num_tokens": 1312065336.0, "step": 6877 }, { "epoch": 2.347499573306025, "grad_norm": 0.23607542009602955, "learning_rate": 1.2076378351036925e-05, "loss": 0.2098, "num_tokens": 1312260067.0, "step": 6878 }, { "epoch": 2.34784092848609, "grad_norm": 0.23791440729309268, "learning_rate": 1.2070055639858372e-05, "loss": 0.2198, "num_tokens": 1312456906.0, "step": 6879 }, { "epoch": 2.3481822836661546, "grad_norm": 0.2531753487047318, "learning_rate": 1.2063732928679819e-05, "loss": 0.2508, "num_tokens": 1312642545.0, "step": 6880 }, { "epoch": 2.3485236388462196, "grad_norm": 0.24009385154384627, "learning_rate": 1.2057410217501266e-05, "loss": 0.2362, "num_tokens": 1312834379.0, "step": 6881 }, { "epoch": 2.3488649940262842, "grad_norm": 0.2297375456685687, "learning_rate": 1.2051087506322711e-05, "loss": 0.1917, "num_tokens": 1312998741.0, "step": 6882 }, { "epoch": 2.3492063492063493, "grad_norm": 0.2251522073255988, "learning_rate": 1.204476479514416e-05, "loss": 0.2285, "num_tokens": 1313205876.0, "step": 6883 }, { "epoch": 2.349547704386414, "grad_norm": 0.267128280917412, "learning_rate": 1.2038442083965605e-05, "loss": 0.2161, "num_tokens": 1313354125.0, "step": 6884 }, { "epoch": 2.349889059566479, "grad_norm": 0.23316045883803768, "learning_rate": 1.2032119372787052e-05, "loss": 0.2235, "num_tokens": 1313526774.0, "step": 6885 }, { "epoch": 2.3502304147465436, "grad_norm": 0.26856479667472505, "learning_rate": 1.2025796661608499e-05, "loss": 0.2285, "num_tokens": 1313684204.0, "step": 6886 }, { "epoch": 2.3505717699266087, "grad_norm": 0.2358104010621094, "learning_rate": 1.2019473950429944e-05, "loss": 0.2101, "num_tokens": 1313854870.0, "step": 6887 }, { "epoch": 2.3509131251066737, "grad_norm": 0.23213173271360715, "learning_rate": 1.2013151239251391e-05, "loss": 0.2145, "num_tokens": 1314035011.0, "step": 6888 }, { "epoch": 2.3512544802867383, "grad_norm": 0.22177562285033775, "learning_rate": 1.2006828528072838e-05, "loss": 0.2329, "num_tokens": 1314254181.0, "step": 6889 }, { "epoch": 2.3515958354668034, "grad_norm": 0.21678243199581004, "learning_rate": 1.2000505816894285e-05, "loss": 0.2527, "num_tokens": 1314518371.0, "step": 6890 }, { "epoch": 2.351937190646868, "grad_norm": 0.23907670841943085, "learning_rate": 1.199418310571573e-05, "loss": 0.2211, "num_tokens": 1314697434.0, "step": 6891 }, { "epoch": 2.352278545826933, "grad_norm": 0.24376987376647957, "learning_rate": 1.1987860394537179e-05, "loss": 0.2079, "num_tokens": 1314859333.0, "step": 6892 }, { "epoch": 2.3526199010069977, "grad_norm": 0.21728346936343634, "learning_rate": 1.1981537683358624e-05, "loss": 0.2211, "num_tokens": 1315066154.0, "step": 6893 }, { "epoch": 2.3529612561870628, "grad_norm": 0.21621402456966884, "learning_rate": 1.1975214972180071e-05, "loss": 0.2239, "num_tokens": 1315271758.0, "step": 6894 }, { "epoch": 2.3533026113671274, "grad_norm": 0.2595764788486116, "learning_rate": 1.1968892261001518e-05, "loss": 0.2217, "num_tokens": 1315447089.0, "step": 6895 }, { "epoch": 2.3536439665471924, "grad_norm": 0.22022798283517916, "learning_rate": 1.1962569549822965e-05, "loss": 0.2397, "num_tokens": 1315669586.0, "step": 6896 }, { "epoch": 2.353985321727257, "grad_norm": 0.22441552340795606, "learning_rate": 1.195624683864441e-05, "loss": 0.2257, "num_tokens": 1315853710.0, "step": 6897 }, { "epoch": 2.354326676907322, "grad_norm": 0.24521614199907352, "learning_rate": 1.194992412746586e-05, "loss": 0.2413, "num_tokens": 1316019857.0, "step": 6898 }, { "epoch": 2.354668032087387, "grad_norm": 0.22927571217097456, "learning_rate": 1.1943601416287305e-05, "loss": 0.2204, "num_tokens": 1316209976.0, "step": 6899 }, { "epoch": 2.355009387267452, "grad_norm": 0.26105514320225115, "learning_rate": 1.1937278705108752e-05, "loss": 0.2378, "num_tokens": 1316394667.0, "step": 6900 }, { "epoch": 2.3553507424475164, "grad_norm": 0.24462868607012003, "learning_rate": 1.1930955993930199e-05, "loss": 0.2462, "num_tokens": 1316590806.0, "step": 6901 }, { "epoch": 2.3556920976275815, "grad_norm": 0.24735695703275534, "learning_rate": 1.1924633282751644e-05, "loss": 0.2175, "num_tokens": 1316758981.0, "step": 6902 }, { "epoch": 2.3560334528076465, "grad_norm": 0.2476415149165088, "learning_rate": 1.1918310571573091e-05, "loss": 0.2049, "num_tokens": 1316908064.0, "step": 6903 }, { "epoch": 2.356374807987711, "grad_norm": 0.24564281192464177, "learning_rate": 1.1911987860394538e-05, "loss": 0.2096, "num_tokens": 1317085307.0, "step": 6904 }, { "epoch": 2.356716163167776, "grad_norm": 0.2517281193014638, "learning_rate": 1.1905665149215985e-05, "loss": 0.2166, "num_tokens": 1317250990.0, "step": 6905 }, { "epoch": 2.357057518347841, "grad_norm": 0.2309717312208158, "learning_rate": 1.189934243803743e-05, "loss": 0.2154, "num_tokens": 1317434797.0, "step": 6906 }, { "epoch": 2.357398873527906, "grad_norm": 0.2448067992746298, "learning_rate": 1.1893019726858879e-05, "loss": 0.2079, "num_tokens": 1317599590.0, "step": 6907 }, { "epoch": 2.3577402287079705, "grad_norm": 0.2628228591147404, "learning_rate": 1.1886697015680324e-05, "loss": 0.2378, "num_tokens": 1317787183.0, "step": 6908 }, { "epoch": 2.3580815838880356, "grad_norm": 0.2319171743343542, "learning_rate": 1.1880374304501771e-05, "loss": 0.2221, "num_tokens": 1317997044.0, "step": 6909 }, { "epoch": 2.3584229390681, "grad_norm": 0.2452081745757077, "learning_rate": 1.1874051593323216e-05, "loss": 0.2083, "num_tokens": 1318200983.0, "step": 6910 }, { "epoch": 2.3587642942481652, "grad_norm": 0.22746216469203562, "learning_rate": 1.1867728882144665e-05, "loss": 0.2275, "num_tokens": 1318393634.0, "step": 6911 }, { "epoch": 2.35910564942823, "grad_norm": 0.25071580779392083, "learning_rate": 1.186140617096611e-05, "loss": 0.2399, "num_tokens": 1318599162.0, "step": 6912 }, { "epoch": 2.359447004608295, "grad_norm": 0.26057555128655974, "learning_rate": 1.1855083459787557e-05, "loss": 0.2397, "num_tokens": 1318796856.0, "step": 6913 }, { "epoch": 2.35978835978836, "grad_norm": 0.25446278555396445, "learning_rate": 1.1848760748609004e-05, "loss": 0.2257, "num_tokens": 1318974196.0, "step": 6914 }, { "epoch": 2.3601297149684246, "grad_norm": 0.2410421898943618, "learning_rate": 1.1842438037430451e-05, "loss": 0.2278, "num_tokens": 1319139264.0, "step": 6915 }, { "epoch": 2.3604710701484897, "grad_norm": 0.23346070334185218, "learning_rate": 1.1836115326251897e-05, "loss": 0.2199, "num_tokens": 1319323434.0, "step": 6916 }, { "epoch": 2.3608124253285543, "grad_norm": 0.24856416955724386, "learning_rate": 1.1829792615073344e-05, "loss": 0.2164, "num_tokens": 1319512841.0, "step": 6917 }, { "epoch": 2.3611537805086193, "grad_norm": 0.23528320542568887, "learning_rate": 1.182346990389479e-05, "loss": 0.2129, "num_tokens": 1319701729.0, "step": 6918 }, { "epoch": 2.361495135688684, "grad_norm": 0.2340462630111493, "learning_rate": 1.1817147192716237e-05, "loss": 0.2192, "num_tokens": 1319888624.0, "step": 6919 }, { "epoch": 2.361836490868749, "grad_norm": 0.22643221909875447, "learning_rate": 1.1810824481537684e-05, "loss": 0.2393, "num_tokens": 1320127742.0, "step": 6920 }, { "epoch": 2.3621778460488136, "grad_norm": 0.21973320848803868, "learning_rate": 1.180450177035913e-05, "loss": 0.2096, "num_tokens": 1320319533.0, "step": 6921 }, { "epoch": 2.3625192012288787, "grad_norm": 0.2244485289928381, "learning_rate": 1.1798179059180577e-05, "loss": 0.2065, "num_tokens": 1320490125.0, "step": 6922 }, { "epoch": 2.3628605564089433, "grad_norm": 0.22648813995211484, "learning_rate": 1.1791856348002024e-05, "loss": 0.2407, "num_tokens": 1320706214.0, "step": 6923 }, { "epoch": 2.3632019115890084, "grad_norm": 0.24897334927429018, "learning_rate": 1.178553363682347e-05, "loss": 0.215, "num_tokens": 1320873511.0, "step": 6924 }, { "epoch": 2.3635432667690734, "grad_norm": 0.24648068360007513, "learning_rate": 1.1779210925644916e-05, "loss": 0.2105, "num_tokens": 1321032755.0, "step": 6925 }, { "epoch": 2.363884621949138, "grad_norm": 0.25103009746169924, "learning_rate": 1.1772888214466365e-05, "loss": 0.2085, "num_tokens": 1321182484.0, "step": 6926 }, { "epoch": 2.364225977129203, "grad_norm": 0.2618491673693975, "learning_rate": 1.176656550328781e-05, "loss": 0.2298, "num_tokens": 1321343660.0, "step": 6927 }, { "epoch": 2.3645673323092677, "grad_norm": 0.8487475310219362, "learning_rate": 1.1760242792109257e-05, "loss": 0.2635, "num_tokens": 1321567538.0, "step": 6928 }, { "epoch": 2.364908687489333, "grad_norm": 0.26105916553364544, "learning_rate": 1.1753920080930704e-05, "loss": 0.2534, "num_tokens": 1321767310.0, "step": 6929 }, { "epoch": 2.3652500426693974, "grad_norm": 0.24206492889476117, "learning_rate": 1.1747597369752151e-05, "loss": 0.2044, "num_tokens": 1321947018.0, "step": 6930 }, { "epoch": 2.3655913978494625, "grad_norm": 0.22546063499007626, "learning_rate": 1.1741274658573596e-05, "loss": 0.2349, "num_tokens": 1322157956.0, "step": 6931 }, { "epoch": 2.365932753029527, "grad_norm": 0.24590472226484764, "learning_rate": 1.1734951947395043e-05, "loss": 0.2477, "num_tokens": 1322355445.0, "step": 6932 }, { "epoch": 2.366274108209592, "grad_norm": 0.25655657581273406, "learning_rate": 1.172862923621649e-05, "loss": 0.231, "num_tokens": 1322534120.0, "step": 6933 }, { "epoch": 2.366615463389657, "grad_norm": 0.25287988050989413, "learning_rate": 1.1722306525037935e-05, "loss": 0.2199, "num_tokens": 1322706221.0, "step": 6934 }, { "epoch": 2.366956818569722, "grad_norm": 0.2107284138709364, "learning_rate": 1.1715983813859384e-05, "loss": 0.2284, "num_tokens": 1322956394.0, "step": 6935 }, { "epoch": 2.3672981737497865, "grad_norm": 0.23313874565848583, "learning_rate": 1.170966110268083e-05, "loss": 0.2512, "num_tokens": 1323187345.0, "step": 6936 }, { "epoch": 2.3676395289298515, "grad_norm": 0.21117441860621647, "learning_rate": 1.1703338391502276e-05, "loss": 0.2277, "num_tokens": 1323392435.0, "step": 6937 }, { "epoch": 2.367980884109916, "grad_norm": 0.26372824453108024, "learning_rate": 1.1697015680323723e-05, "loss": 0.22, "num_tokens": 1323545390.0, "step": 6938 }, { "epoch": 2.368322239289981, "grad_norm": 0.2199773192566652, "learning_rate": 1.169069296914517e-05, "loss": 0.2133, "num_tokens": 1323738521.0, "step": 6939 }, { "epoch": 2.3686635944700463, "grad_norm": 0.23389430670146144, "learning_rate": 1.1684370257966616e-05, "loss": 0.2112, "num_tokens": 1323915316.0, "step": 6940 }, { "epoch": 2.369004949650111, "grad_norm": 0.2484892720661754, "learning_rate": 1.1678047546788064e-05, "loss": 0.2146, "num_tokens": 1324076014.0, "step": 6941 }, { "epoch": 2.369346304830176, "grad_norm": 0.2249279513300143, "learning_rate": 1.167172483560951e-05, "loss": 0.228, "num_tokens": 1324265571.0, "step": 6942 }, { "epoch": 2.3696876600102406, "grad_norm": 0.26591697819392196, "learning_rate": 1.1665402124430957e-05, "loss": 0.2155, "num_tokens": 1324430720.0, "step": 6943 }, { "epoch": 2.3700290151903056, "grad_norm": 0.24019911868684007, "learning_rate": 1.1659079413252404e-05, "loss": 0.2069, "num_tokens": 1324616169.0, "step": 6944 }, { "epoch": 2.3703703703703702, "grad_norm": 0.23139976675588877, "learning_rate": 1.165275670207385e-05, "loss": 0.237, "num_tokens": 1324807385.0, "step": 6945 }, { "epoch": 2.3707117255504353, "grad_norm": 0.2505136540057084, "learning_rate": 1.1646433990895296e-05, "loss": 0.2299, "num_tokens": 1324975534.0, "step": 6946 }, { "epoch": 2.3710530807305, "grad_norm": 0.23156983786671456, "learning_rate": 1.1640111279716743e-05, "loss": 0.2712, "num_tokens": 1325211869.0, "step": 6947 }, { "epoch": 2.371394435910565, "grad_norm": 0.23399828158952787, "learning_rate": 1.163378856853819e-05, "loss": 0.2273, "num_tokens": 1325406131.0, "step": 6948 }, { "epoch": 2.3717357910906296, "grad_norm": 0.237565041565733, "learning_rate": 1.1627465857359635e-05, "loss": 0.2198, "num_tokens": 1325592809.0, "step": 6949 }, { "epoch": 2.3720771462706947, "grad_norm": 0.22292146282609146, "learning_rate": 1.1621143146181084e-05, "loss": 0.2099, "num_tokens": 1325788272.0, "step": 6950 }, { "epoch": 2.3724185014507597, "grad_norm": 0.26914318546586125, "learning_rate": 1.1614820435002529e-05, "loss": 0.2176, "num_tokens": 1325947945.0, "step": 6951 }, { "epoch": 2.3727598566308243, "grad_norm": 0.23139546923603713, "learning_rate": 1.1608497723823976e-05, "loss": 0.2369, "num_tokens": 1326160000.0, "step": 6952 }, { "epoch": 2.3731012118108894, "grad_norm": 0.2211208815122764, "learning_rate": 1.1602175012645423e-05, "loss": 0.234, "num_tokens": 1326364011.0, "step": 6953 }, { "epoch": 2.373442566990954, "grad_norm": 0.2270517152345773, "learning_rate": 1.159585230146687e-05, "loss": 0.2231, "num_tokens": 1326567866.0, "step": 6954 }, { "epoch": 2.373783922171019, "grad_norm": 0.24383693417051677, "learning_rate": 1.1589529590288315e-05, "loss": 0.2144, "num_tokens": 1326735743.0, "step": 6955 }, { "epoch": 2.3741252773510837, "grad_norm": 0.2272929838165886, "learning_rate": 1.1583206879109764e-05, "loss": 0.216, "num_tokens": 1326938324.0, "step": 6956 }, { "epoch": 2.3744666325311488, "grad_norm": 0.24924925722921892, "learning_rate": 1.157688416793121e-05, "loss": 0.236, "num_tokens": 1327115276.0, "step": 6957 }, { "epoch": 2.3748079877112134, "grad_norm": 0.28280782825626805, "learning_rate": 1.1570561456752656e-05, "loss": 0.2173, "num_tokens": 1327265168.0, "step": 6958 }, { "epoch": 2.3751493428912784, "grad_norm": 0.2349682544545979, "learning_rate": 1.1564238745574103e-05, "loss": 0.2201, "num_tokens": 1327455679.0, "step": 6959 }, { "epoch": 2.375490698071343, "grad_norm": 0.22357803683050215, "learning_rate": 1.155791603439555e-05, "loss": 0.2252, "num_tokens": 1327674321.0, "step": 6960 }, { "epoch": 2.375832053251408, "grad_norm": 0.24009555367630295, "learning_rate": 1.1551593323216996e-05, "loss": 0.2439, "num_tokens": 1327880192.0, "step": 6961 }, { "epoch": 2.376173408431473, "grad_norm": 0.25287414283978343, "learning_rate": 1.1545270612038443e-05, "loss": 0.2305, "num_tokens": 1328045777.0, "step": 6962 }, { "epoch": 2.376514763611538, "grad_norm": 0.23163803573726777, "learning_rate": 1.153894790085989e-05, "loss": 0.2384, "num_tokens": 1328236017.0, "step": 6963 }, { "epoch": 2.376856118791603, "grad_norm": 0.23688321234995713, "learning_rate": 1.1532625189681335e-05, "loss": 0.2301, "num_tokens": 1328422953.0, "step": 6964 }, { "epoch": 2.3771974739716675, "grad_norm": 0.20663085347685578, "learning_rate": 1.1526302478502783e-05, "loss": 0.2206, "num_tokens": 1328647303.0, "step": 6965 }, { "epoch": 2.3775388291517325, "grad_norm": 0.22868380021904072, "learning_rate": 1.1519979767324229e-05, "loss": 0.2029, "num_tokens": 1328825364.0, "step": 6966 }, { "epoch": 2.377880184331797, "grad_norm": 0.2583353935535827, "learning_rate": 1.1513657056145676e-05, "loss": 0.2092, "num_tokens": 1328985495.0, "step": 6967 }, { "epoch": 2.378221539511862, "grad_norm": 0.24495825486105274, "learning_rate": 1.1507334344967123e-05, "loss": 0.2077, "num_tokens": 1329164032.0, "step": 6968 }, { "epoch": 2.378562894691927, "grad_norm": 0.24682700509355582, "learning_rate": 1.150101163378857e-05, "loss": 0.228, "num_tokens": 1329354169.0, "step": 6969 }, { "epoch": 2.378904249871992, "grad_norm": 0.24234209991516278, "learning_rate": 1.1494688922610015e-05, "loss": 0.2145, "num_tokens": 1329520401.0, "step": 6970 }, { "epoch": 2.3792456050520565, "grad_norm": 0.23659873563534725, "learning_rate": 1.1488366211431464e-05, "loss": 0.2126, "num_tokens": 1329681196.0, "step": 6971 }, { "epoch": 2.3795869602321216, "grad_norm": 0.1998480717016953, "learning_rate": 1.1482043500252909e-05, "loss": 0.2275, "num_tokens": 1329918527.0, "step": 6972 }, { "epoch": 2.379928315412186, "grad_norm": 0.22227353505934924, "learning_rate": 1.1475720789074356e-05, "loss": 0.2235, "num_tokens": 1330118300.0, "step": 6973 }, { "epoch": 2.3802696705922513, "grad_norm": 0.22456349329082928, "learning_rate": 1.1469398077895803e-05, "loss": 0.2401, "num_tokens": 1330341154.0, "step": 6974 }, { "epoch": 2.380611025772316, "grad_norm": 0.22361393060458323, "learning_rate": 1.146307536671725e-05, "loss": 0.2312, "num_tokens": 1330548874.0, "step": 6975 }, { "epoch": 2.380952380952381, "grad_norm": 0.2317928040723866, "learning_rate": 1.1456752655538695e-05, "loss": 0.234, "num_tokens": 1330761366.0, "step": 6976 }, { "epoch": 2.381293736132446, "grad_norm": 0.2623731754075152, "learning_rate": 1.1450429944360142e-05, "loss": 0.2158, "num_tokens": 1330917764.0, "step": 6977 }, { "epoch": 2.3816350913125106, "grad_norm": 0.24268973254824805, "learning_rate": 1.144410723318159e-05, "loss": 0.2278, "num_tokens": 1331086735.0, "step": 6978 }, { "epoch": 2.3819764464925757, "grad_norm": 0.2280591149597647, "learning_rate": 1.1437784522003034e-05, "loss": 0.2133, "num_tokens": 1331282118.0, "step": 6979 }, { "epoch": 2.3823178016726403, "grad_norm": 0.23274468131776552, "learning_rate": 1.1431461810824481e-05, "loss": 0.2194, "num_tokens": 1331455241.0, "step": 6980 }, { "epoch": 2.3826591568527054, "grad_norm": 0.24510690393077092, "learning_rate": 1.1425139099645928e-05, "loss": 0.2507, "num_tokens": 1331657517.0, "step": 6981 }, { "epoch": 2.38300051203277, "grad_norm": 0.2638744554096112, "learning_rate": 1.1418816388467375e-05, "loss": 0.213, "num_tokens": 1331808595.0, "step": 6982 }, { "epoch": 2.383341867212835, "grad_norm": 0.21543376782076715, "learning_rate": 1.141249367728882e-05, "loss": 0.2268, "num_tokens": 1332031546.0, "step": 6983 }, { "epoch": 2.3836832223928996, "grad_norm": 0.23718152365728817, "learning_rate": 1.140617096611027e-05, "loss": 0.2197, "num_tokens": 1332243729.0, "step": 6984 }, { "epoch": 2.3840245775729647, "grad_norm": 0.25586089460250394, "learning_rate": 1.1399848254931715e-05, "loss": 0.2215, "num_tokens": 1332410200.0, "step": 6985 }, { "epoch": 2.3843659327530293, "grad_norm": 0.2550140497432372, "learning_rate": 1.1393525543753162e-05, "loss": 0.195, "num_tokens": 1332559829.0, "step": 6986 }, { "epoch": 2.3847072879330944, "grad_norm": 0.25596276465337736, "learning_rate": 1.1387202832574609e-05, "loss": 0.2383, "num_tokens": 1332742926.0, "step": 6987 }, { "epoch": 2.3850486431131595, "grad_norm": 0.2594941036889668, "learning_rate": 1.1380880121396056e-05, "loss": 0.2121, "num_tokens": 1332901604.0, "step": 6988 }, { "epoch": 2.385389998293224, "grad_norm": 0.23898388667171583, "learning_rate": 1.1374557410217501e-05, "loss": 0.2473, "num_tokens": 1333094603.0, "step": 6989 }, { "epoch": 2.385731353473289, "grad_norm": 0.2688834429346641, "learning_rate": 1.136823469903895e-05, "loss": 0.2216, "num_tokens": 1333253324.0, "step": 6990 }, { "epoch": 2.3860727086533537, "grad_norm": 0.250886775141333, "learning_rate": 1.1361911987860395e-05, "loss": 0.2435, "num_tokens": 1333428527.0, "step": 6991 }, { "epoch": 2.386414063833419, "grad_norm": 0.2279885524642493, "learning_rate": 1.1355589276681842e-05, "loss": 0.2423, "num_tokens": 1333657627.0, "step": 6992 }, { "epoch": 2.3867554190134834, "grad_norm": 0.2545171404860166, "learning_rate": 1.1349266565503289e-05, "loss": 0.2334, "num_tokens": 1333838189.0, "step": 6993 }, { "epoch": 2.3870967741935485, "grad_norm": 0.22446430414323934, "learning_rate": 1.1342943854324734e-05, "loss": 0.2234, "num_tokens": 1334043813.0, "step": 6994 }, { "epoch": 2.387438129373613, "grad_norm": 0.20531558717302772, "learning_rate": 1.1336621143146181e-05, "loss": 0.223, "num_tokens": 1334262064.0, "step": 6995 }, { "epoch": 2.387779484553678, "grad_norm": 0.24042677206945626, "learning_rate": 1.1330298431967628e-05, "loss": 0.2375, "num_tokens": 1334466912.0, "step": 6996 }, { "epoch": 2.388120839733743, "grad_norm": 0.25091544575139485, "learning_rate": 1.1323975720789075e-05, "loss": 0.2202, "num_tokens": 1334629960.0, "step": 6997 }, { "epoch": 2.388462194913808, "grad_norm": 0.24047480110979857, "learning_rate": 1.131765300961052e-05, "loss": 0.2339, "num_tokens": 1334831391.0, "step": 6998 }, { "epoch": 2.388803550093873, "grad_norm": 0.24445727470511275, "learning_rate": 1.1311330298431969e-05, "loss": 0.2193, "num_tokens": 1335010192.0, "step": 6999 }, { "epoch": 2.3891449052739375, "grad_norm": 0.21944755636692928, "learning_rate": 1.1305007587253414e-05, "loss": 0.258, "num_tokens": 1335232085.0, "step": 7000 }, { "epoch": 2.389486260454002, "grad_norm": 0.2630737404166038, "learning_rate": 1.1298684876074861e-05, "loss": 0.2434, "num_tokens": 1335406601.0, "step": 7001 }, { "epoch": 2.389827615634067, "grad_norm": 0.2330415899525152, "learning_rate": 1.1292362164896308e-05, "loss": 0.2237, "num_tokens": 1335596060.0, "step": 7002 }, { "epoch": 2.3901689708141323, "grad_norm": 0.3020643185180269, "learning_rate": 1.1286039453717755e-05, "loss": 0.2319, "num_tokens": 1335809323.0, "step": 7003 }, { "epoch": 2.390510325994197, "grad_norm": 0.2574227244029974, "learning_rate": 1.12797167425392e-05, "loss": 0.2329, "num_tokens": 1336015530.0, "step": 7004 }, { "epoch": 2.390851681174262, "grad_norm": 0.21099451535363078, "learning_rate": 1.127339403136065e-05, "loss": 0.2208, "num_tokens": 1336209107.0, "step": 7005 }, { "epoch": 2.3911930363543266, "grad_norm": 0.23206028099204815, "learning_rate": 1.1267071320182095e-05, "loss": 0.2182, "num_tokens": 1336395199.0, "step": 7006 }, { "epoch": 2.3915343915343916, "grad_norm": 0.22905115056750722, "learning_rate": 1.1260748609003542e-05, "loss": 0.2263, "num_tokens": 1336591645.0, "step": 7007 }, { "epoch": 2.3918757467144562, "grad_norm": 0.24883003208681514, "learning_rate": 1.1254425897824988e-05, "loss": 0.2033, "num_tokens": 1336756996.0, "step": 7008 }, { "epoch": 2.3922171018945213, "grad_norm": 0.26879586999001004, "learning_rate": 1.1248103186646434e-05, "loss": 0.2405, "num_tokens": 1336943658.0, "step": 7009 }, { "epoch": 2.392558457074586, "grad_norm": 0.2116256436532634, "learning_rate": 1.124178047546788e-05, "loss": 0.23, "num_tokens": 1337162950.0, "step": 7010 }, { "epoch": 2.392899812254651, "grad_norm": 0.2470063581829595, "learning_rate": 1.1235457764289328e-05, "loss": 0.2189, "num_tokens": 1337333645.0, "step": 7011 }, { "epoch": 2.3932411674347156, "grad_norm": 0.23608847457971044, "learning_rate": 1.1229135053110775e-05, "loss": 0.2343, "num_tokens": 1337513162.0, "step": 7012 }, { "epoch": 2.3935825226147807, "grad_norm": 0.2495383001095515, "learning_rate": 1.122281234193222e-05, "loss": 0.2572, "num_tokens": 1337713031.0, "step": 7013 }, { "epoch": 2.3939238777948457, "grad_norm": 0.23492650104587698, "learning_rate": 1.1216489630753669e-05, "loss": 0.2078, "num_tokens": 1337880227.0, "step": 7014 }, { "epoch": 2.3942652329749103, "grad_norm": 0.2269413657045848, "learning_rate": 1.1210166919575114e-05, "loss": 0.2166, "num_tokens": 1338079686.0, "step": 7015 }, { "epoch": 2.3946065881549754, "grad_norm": 0.2344309576704437, "learning_rate": 1.1203844208396561e-05, "loss": 0.2293, "num_tokens": 1338245353.0, "step": 7016 }, { "epoch": 2.39494794333504, "grad_norm": 0.22484145003211453, "learning_rate": 1.1197521497218008e-05, "loss": 0.2234, "num_tokens": 1338447686.0, "step": 7017 }, { "epoch": 2.395289298515105, "grad_norm": 0.22952797722878343, "learning_rate": 1.1191198786039455e-05, "loss": 0.2181, "num_tokens": 1338617868.0, "step": 7018 }, { "epoch": 2.3956306536951697, "grad_norm": 0.23947093407971518, "learning_rate": 1.11848760748609e-05, "loss": 0.2411, "num_tokens": 1338821412.0, "step": 7019 }, { "epoch": 2.3959720088752348, "grad_norm": 0.242116603784682, "learning_rate": 1.1178553363682349e-05, "loss": 0.2391, "num_tokens": 1339015812.0, "step": 7020 }, { "epoch": 2.3963133640552994, "grad_norm": 0.2515234640802437, "learning_rate": 1.1172230652503794e-05, "loss": 0.2378, "num_tokens": 1339188533.0, "step": 7021 }, { "epoch": 2.3966547192353644, "grad_norm": 0.23494705012134715, "learning_rate": 1.1165907941325241e-05, "loss": 0.2281, "num_tokens": 1339385285.0, "step": 7022 }, { "epoch": 2.396996074415429, "grad_norm": 0.22245427727604075, "learning_rate": 1.1159585230146688e-05, "loss": 0.2378, "num_tokens": 1339609368.0, "step": 7023 }, { "epoch": 2.397337429595494, "grad_norm": 0.23258367641085392, "learning_rate": 1.1153262518968133e-05, "loss": 0.2435, "num_tokens": 1339826987.0, "step": 7024 }, { "epoch": 2.397678784775559, "grad_norm": 0.21832247230043925, "learning_rate": 1.114693980778958e-05, "loss": 0.2306, "num_tokens": 1340064127.0, "step": 7025 }, { "epoch": 2.398020139955624, "grad_norm": 0.21995617566052433, "learning_rate": 1.1140617096611027e-05, "loss": 0.2414, "num_tokens": 1340280146.0, "step": 7026 }, { "epoch": 2.398361495135689, "grad_norm": 0.2685927505709504, "learning_rate": 1.1134294385432474e-05, "loss": 0.206, "num_tokens": 1340418967.0, "step": 7027 }, { "epoch": 2.3987028503157535, "grad_norm": 0.23860176556785714, "learning_rate": 1.112797167425392e-05, "loss": 0.2144, "num_tokens": 1340593671.0, "step": 7028 }, { "epoch": 2.3990442054958185, "grad_norm": 0.24109102748156727, "learning_rate": 1.1121648963075368e-05, "loss": 0.2237, "num_tokens": 1340793981.0, "step": 7029 }, { "epoch": 2.399385560675883, "grad_norm": 0.24974364683790592, "learning_rate": 1.1115326251896814e-05, "loss": 0.2334, "num_tokens": 1340966122.0, "step": 7030 }, { "epoch": 2.399726915855948, "grad_norm": 0.2568619565990182, "learning_rate": 1.110900354071826e-05, "loss": 0.2419, "num_tokens": 1341127857.0, "step": 7031 }, { "epoch": 2.400068271036013, "grad_norm": 0.2379229893102302, "learning_rate": 1.1102680829539708e-05, "loss": 0.244, "num_tokens": 1341345044.0, "step": 7032 }, { "epoch": 2.400409626216078, "grad_norm": 0.2391828170076069, "learning_rate": 1.1096358118361155e-05, "loss": 0.2321, "num_tokens": 1341532596.0, "step": 7033 }, { "epoch": 2.4007509813961425, "grad_norm": 0.21973282187587462, "learning_rate": 1.10900354071826e-05, "loss": 0.2537, "num_tokens": 1341753239.0, "step": 7034 }, { "epoch": 2.4010923365762076, "grad_norm": 0.29625200043694117, "learning_rate": 1.1083712696004047e-05, "loss": 0.2192, "num_tokens": 1341917277.0, "step": 7035 }, { "epoch": 2.4014336917562726, "grad_norm": 0.24248956587556655, "learning_rate": 1.1077389984825494e-05, "loss": 0.2542, "num_tokens": 1342136174.0, "step": 7036 }, { "epoch": 2.4017750469363373, "grad_norm": 0.24770377160228843, "learning_rate": 1.107106727364694e-05, "loss": 0.2095, "num_tokens": 1342297062.0, "step": 7037 }, { "epoch": 2.402116402116402, "grad_norm": 0.23983657549182813, "learning_rate": 1.1064744562468388e-05, "loss": 0.2357, "num_tokens": 1342520184.0, "step": 7038 }, { "epoch": 2.402457757296467, "grad_norm": 0.257957193518687, "learning_rate": 1.1058421851289833e-05, "loss": 0.2165, "num_tokens": 1342686441.0, "step": 7039 }, { "epoch": 2.402799112476532, "grad_norm": 0.2897280180473594, "learning_rate": 1.105209914011128e-05, "loss": 0.2229, "num_tokens": 1342825238.0, "step": 7040 }, { "epoch": 2.4031404676565966, "grad_norm": 0.22772726291461443, "learning_rate": 1.1045776428932727e-05, "loss": 0.2294, "num_tokens": 1343040302.0, "step": 7041 }, { "epoch": 2.4034818228366617, "grad_norm": 0.22076720355088902, "learning_rate": 1.1039453717754174e-05, "loss": 0.237, "num_tokens": 1343252052.0, "step": 7042 }, { "epoch": 2.4038231780167263, "grad_norm": 0.24629610634494356, "learning_rate": 1.103313100657562e-05, "loss": 0.243, "num_tokens": 1343460888.0, "step": 7043 }, { "epoch": 2.4041645331967914, "grad_norm": 0.23292065451114122, "learning_rate": 1.1026808295397066e-05, "loss": 0.2216, "num_tokens": 1343639886.0, "step": 7044 }, { "epoch": 2.404505888376856, "grad_norm": 0.24587207649640527, "learning_rate": 1.1020485584218513e-05, "loss": 0.2396, "num_tokens": 1343840590.0, "step": 7045 }, { "epoch": 2.404847243556921, "grad_norm": 0.2284071702556241, "learning_rate": 1.101416287303996e-05, "loss": 0.2281, "num_tokens": 1344050226.0, "step": 7046 }, { "epoch": 2.4051885987369857, "grad_norm": 0.2187035490397753, "learning_rate": 1.1007840161861406e-05, "loss": 0.2255, "num_tokens": 1344256972.0, "step": 7047 }, { "epoch": 2.4055299539170507, "grad_norm": 0.21551057786387806, "learning_rate": 1.1001517450682854e-05, "loss": 0.2565, "num_tokens": 1344507200.0, "step": 7048 }, { "epoch": 2.4058713090971153, "grad_norm": 0.22782643973592792, "learning_rate": 1.09951947395043e-05, "loss": 0.221, "num_tokens": 1344693654.0, "step": 7049 }, { "epoch": 2.4062126642771804, "grad_norm": 0.2774208967039154, "learning_rate": 1.0988872028325747e-05, "loss": 0.2288, "num_tokens": 1344861334.0, "step": 7050 }, { "epoch": 2.4065540194572455, "grad_norm": 0.2574420483067936, "learning_rate": 1.0982549317147194e-05, "loss": 0.2387, "num_tokens": 1345036352.0, "step": 7051 }, { "epoch": 2.40689537463731, "grad_norm": 0.23350738229044582, "learning_rate": 1.097622660596864e-05, "loss": 0.2299, "num_tokens": 1345243777.0, "step": 7052 }, { "epoch": 2.407236729817375, "grad_norm": 0.25083714679225616, "learning_rate": 1.0969903894790086e-05, "loss": 0.2375, "num_tokens": 1345409539.0, "step": 7053 }, { "epoch": 2.4075780849974397, "grad_norm": 0.23318002027279092, "learning_rate": 1.0963581183611533e-05, "loss": 0.2352, "num_tokens": 1345611582.0, "step": 7054 }, { "epoch": 2.407919440177505, "grad_norm": 0.24492636215752692, "learning_rate": 1.095725847243298e-05, "loss": 0.2167, "num_tokens": 1345793366.0, "step": 7055 }, { "epoch": 2.4082607953575694, "grad_norm": 0.22048081348967202, "learning_rate": 1.0950935761254425e-05, "loss": 0.2154, "num_tokens": 1345989570.0, "step": 7056 }, { "epoch": 2.4086021505376345, "grad_norm": 0.2299602974116801, "learning_rate": 1.0944613050075874e-05, "loss": 0.2203, "num_tokens": 1346177392.0, "step": 7057 }, { "epoch": 2.408943505717699, "grad_norm": 0.252733653440639, "learning_rate": 1.0938290338897319e-05, "loss": 0.2366, "num_tokens": 1346363051.0, "step": 7058 }, { "epoch": 2.409284860897764, "grad_norm": 0.22281432356584593, "learning_rate": 1.0931967627718766e-05, "loss": 0.2361, "num_tokens": 1346566902.0, "step": 7059 }, { "epoch": 2.409626216077829, "grad_norm": 0.27060750549756657, "learning_rate": 1.0925644916540213e-05, "loss": 0.2535, "num_tokens": 1346758420.0, "step": 7060 }, { "epoch": 2.409967571257894, "grad_norm": 0.2023987279538912, "learning_rate": 1.091932220536166e-05, "loss": 0.2391, "num_tokens": 1347022998.0, "step": 7061 }, { "epoch": 2.410308926437959, "grad_norm": 0.23227104555918532, "learning_rate": 1.0912999494183105e-05, "loss": 0.2034, "num_tokens": 1347200432.0, "step": 7062 }, { "epoch": 2.4106502816180235, "grad_norm": 0.2670630644021473, "learning_rate": 1.0906676783004554e-05, "loss": 0.2332, "num_tokens": 1347379598.0, "step": 7063 }, { "epoch": 2.4109916367980886, "grad_norm": 0.197349012630251, "learning_rate": 1.0900354071826e-05, "loss": 0.2426, "num_tokens": 1347643625.0, "step": 7064 }, { "epoch": 2.411332991978153, "grad_norm": 0.21682005016280428, "learning_rate": 1.0894031360647446e-05, "loss": 0.2378, "num_tokens": 1347852473.0, "step": 7065 }, { "epoch": 2.4116743471582183, "grad_norm": 0.2425830958026398, "learning_rate": 1.0887708649468893e-05, "loss": 0.2223, "num_tokens": 1348030836.0, "step": 7066 }, { "epoch": 2.412015702338283, "grad_norm": 0.23048773328821798, "learning_rate": 1.088138593829034e-05, "loss": 0.2152, "num_tokens": 1348224374.0, "step": 7067 }, { "epoch": 2.412357057518348, "grad_norm": 0.23900919671000023, "learning_rate": 1.0875063227111785e-05, "loss": 0.1878, "num_tokens": 1348392932.0, "step": 7068 }, { "epoch": 2.4126984126984126, "grad_norm": 0.24717947469088938, "learning_rate": 1.0868740515933232e-05, "loss": 0.2233, "num_tokens": 1348575600.0, "step": 7069 }, { "epoch": 2.4130397678784776, "grad_norm": 0.2694852820379434, "learning_rate": 1.086241780475468e-05, "loss": 0.219, "num_tokens": 1348761366.0, "step": 7070 }, { "epoch": 2.4133811230585422, "grad_norm": 0.25038826826484567, "learning_rate": 1.0856095093576125e-05, "loss": 0.2305, "num_tokens": 1348962579.0, "step": 7071 }, { "epoch": 2.4137224782386073, "grad_norm": 0.22841785425349462, "learning_rate": 1.0849772382397573e-05, "loss": 0.2413, "num_tokens": 1349160893.0, "step": 7072 }, { "epoch": 2.4140638334186724, "grad_norm": 0.2250762104885061, "learning_rate": 1.0843449671219019e-05, "loss": 0.2234, "num_tokens": 1349345501.0, "step": 7073 }, { "epoch": 2.414405188598737, "grad_norm": 0.25245626290826445, "learning_rate": 1.0837126960040466e-05, "loss": 0.2112, "num_tokens": 1349516206.0, "step": 7074 }, { "epoch": 2.4147465437788016, "grad_norm": 0.22564499780810304, "learning_rate": 1.0830804248861913e-05, "loss": 0.2502, "num_tokens": 1349728465.0, "step": 7075 }, { "epoch": 2.4150878989588667, "grad_norm": 0.24782934910995638, "learning_rate": 1.082448153768336e-05, "loss": 0.2145, "num_tokens": 1349888971.0, "step": 7076 }, { "epoch": 2.4154292541389317, "grad_norm": 0.22586270663703692, "learning_rate": 1.0818158826504805e-05, "loss": 0.2485, "num_tokens": 1350117395.0, "step": 7077 }, { "epoch": 2.4157706093189963, "grad_norm": 0.2425523979137725, "learning_rate": 1.0811836115326254e-05, "loss": 0.2328, "num_tokens": 1350296169.0, "step": 7078 }, { "epoch": 2.4161119644990614, "grad_norm": 0.23461636143540068, "learning_rate": 1.0805513404147699e-05, "loss": 0.2063, "num_tokens": 1350474424.0, "step": 7079 }, { "epoch": 2.416453319679126, "grad_norm": 0.24867544366621513, "learning_rate": 1.0799190692969146e-05, "loss": 0.2201, "num_tokens": 1350655015.0, "step": 7080 }, { "epoch": 2.416794674859191, "grad_norm": 0.24402593659818664, "learning_rate": 1.0792867981790593e-05, "loss": 0.2288, "num_tokens": 1350837721.0, "step": 7081 }, { "epoch": 2.4171360300392557, "grad_norm": 0.2627842471459222, "learning_rate": 1.078654527061204e-05, "loss": 0.2405, "num_tokens": 1350999902.0, "step": 7082 }, { "epoch": 2.4174773852193208, "grad_norm": 0.2357337864086971, "learning_rate": 1.0780222559433485e-05, "loss": 0.2312, "num_tokens": 1351188285.0, "step": 7083 }, { "epoch": 2.4178187403993854, "grad_norm": 0.23605720220363155, "learning_rate": 1.0773899848254932e-05, "loss": 0.2162, "num_tokens": 1351350673.0, "step": 7084 }, { "epoch": 2.4181600955794504, "grad_norm": 0.24332439440079331, "learning_rate": 1.0767577137076379e-05, "loss": 0.2234, "num_tokens": 1351531461.0, "step": 7085 }, { "epoch": 2.418501450759515, "grad_norm": 0.23303162273454023, "learning_rate": 1.0761254425897824e-05, "loss": 0.2272, "num_tokens": 1351738474.0, "step": 7086 }, { "epoch": 2.41884280593958, "grad_norm": 0.2388997233865487, "learning_rate": 1.0754931714719273e-05, "loss": 0.2314, "num_tokens": 1351926634.0, "step": 7087 }, { "epoch": 2.419184161119645, "grad_norm": 0.23797730089646446, "learning_rate": 1.0748609003540718e-05, "loss": 0.2251, "num_tokens": 1352126582.0, "step": 7088 }, { "epoch": 2.41952551629971, "grad_norm": 0.2944359199066287, "learning_rate": 1.0742286292362165e-05, "loss": 0.2452, "num_tokens": 1352314881.0, "step": 7089 }, { "epoch": 2.419866871479775, "grad_norm": 0.2219542758506217, "learning_rate": 1.0735963581183612e-05, "loss": 0.2415, "num_tokens": 1352542660.0, "step": 7090 }, { "epoch": 2.4202082266598395, "grad_norm": 0.23557241685257435, "learning_rate": 1.072964087000506e-05, "loss": 0.2236, "num_tokens": 1352747103.0, "step": 7091 }, { "epoch": 2.4205495818399045, "grad_norm": 0.24353007018437872, "learning_rate": 1.0723318158826505e-05, "loss": 0.2492, "num_tokens": 1352940974.0, "step": 7092 }, { "epoch": 2.420890937019969, "grad_norm": 0.24360610860329412, "learning_rate": 1.0716995447647953e-05, "loss": 0.2134, "num_tokens": 1353116979.0, "step": 7093 }, { "epoch": 2.421232292200034, "grad_norm": 0.22770903560212694, "learning_rate": 1.0710672736469399e-05, "loss": 0.2218, "num_tokens": 1353292636.0, "step": 7094 }, { "epoch": 2.421573647380099, "grad_norm": 0.2708311907731864, "learning_rate": 1.0704350025290846e-05, "loss": 0.2616, "num_tokens": 1353486357.0, "step": 7095 }, { "epoch": 2.421915002560164, "grad_norm": 0.2507383231964194, "learning_rate": 1.0698027314112293e-05, "loss": 0.2267, "num_tokens": 1353637644.0, "step": 7096 }, { "epoch": 2.4222563577402285, "grad_norm": 0.20696384150584157, "learning_rate": 1.069170460293374e-05, "loss": 0.2377, "num_tokens": 1353864723.0, "step": 7097 }, { "epoch": 2.4225977129202936, "grad_norm": 0.24400684783640408, "learning_rate": 1.0685381891755185e-05, "loss": 0.2466, "num_tokens": 1354044804.0, "step": 7098 }, { "epoch": 2.4229390681003586, "grad_norm": 0.22806895334445945, "learning_rate": 1.0679059180576632e-05, "loss": 0.2348, "num_tokens": 1354242918.0, "step": 7099 }, { "epoch": 2.4232804232804233, "grad_norm": 0.26431584370215955, "learning_rate": 1.0672736469398079e-05, "loss": 0.205, "num_tokens": 1354405055.0, "step": 7100 }, { "epoch": 2.4236217784604883, "grad_norm": 0.24739703893411105, "learning_rate": 1.0666413758219524e-05, "loss": 0.217, "num_tokens": 1354571223.0, "step": 7101 }, { "epoch": 2.423963133640553, "grad_norm": 0.22411104415872876, "learning_rate": 1.0660091047040973e-05, "loss": 0.2403, "num_tokens": 1354795645.0, "step": 7102 }, { "epoch": 2.424304488820618, "grad_norm": 0.26192167072077355, "learning_rate": 1.0653768335862418e-05, "loss": 0.2247, "num_tokens": 1354952550.0, "step": 7103 }, { "epoch": 2.4246458440006826, "grad_norm": 0.25791454136801584, "learning_rate": 1.0647445624683865e-05, "loss": 0.2477, "num_tokens": 1355168815.0, "step": 7104 }, { "epoch": 2.4249871991807477, "grad_norm": 0.2580830904974632, "learning_rate": 1.0641122913505312e-05, "loss": 0.2248, "num_tokens": 1355321505.0, "step": 7105 }, { "epoch": 2.4253285543608123, "grad_norm": 0.22061103655991487, "learning_rate": 1.0634800202326759e-05, "loss": 0.2157, "num_tokens": 1355510774.0, "step": 7106 }, { "epoch": 2.4256699095408774, "grad_norm": 0.24494699589381133, "learning_rate": 1.0628477491148204e-05, "loss": 0.2262, "num_tokens": 1355699750.0, "step": 7107 }, { "epoch": 2.426011264720942, "grad_norm": 0.2464433125347164, "learning_rate": 1.0622154779969651e-05, "loss": 0.2216, "num_tokens": 1355893593.0, "step": 7108 }, { "epoch": 2.426352619901007, "grad_norm": 0.21685284454488665, "learning_rate": 1.0615832068791098e-05, "loss": 0.2183, "num_tokens": 1356094888.0, "step": 7109 }, { "epoch": 2.426693975081072, "grad_norm": 0.2288756094014203, "learning_rate": 1.0609509357612545e-05, "loss": 0.2071, "num_tokens": 1356269947.0, "step": 7110 }, { "epoch": 2.4270353302611367, "grad_norm": 0.2550803284489449, "learning_rate": 1.060318664643399e-05, "loss": 0.2266, "num_tokens": 1356456397.0, "step": 7111 }, { "epoch": 2.4273766854412013, "grad_norm": 0.23246102107467345, "learning_rate": 1.0596863935255437e-05, "loss": 0.2422, "num_tokens": 1356659800.0, "step": 7112 }, { "epoch": 2.4277180406212664, "grad_norm": 0.22130785919983534, "learning_rate": 1.0590541224076884e-05, "loss": 0.2467, "num_tokens": 1356876995.0, "step": 7113 }, { "epoch": 2.4280593958013315, "grad_norm": 0.2352796024618593, "learning_rate": 1.0584218512898331e-05, "loss": 0.2265, "num_tokens": 1357078945.0, "step": 7114 }, { "epoch": 2.428400750981396, "grad_norm": 0.2545610236471479, "learning_rate": 1.0577895801719778e-05, "loss": 0.2404, "num_tokens": 1357307421.0, "step": 7115 }, { "epoch": 2.428742106161461, "grad_norm": 0.24777207988542754, "learning_rate": 1.0571573090541224e-05, "loss": 0.2172, "num_tokens": 1357481934.0, "step": 7116 }, { "epoch": 2.4290834613415258, "grad_norm": 0.21670683638532143, "learning_rate": 1.056525037936267e-05, "loss": 0.2276, "num_tokens": 1357693030.0, "step": 7117 }, { "epoch": 2.429424816521591, "grad_norm": 0.2361697669077394, "learning_rate": 1.0558927668184118e-05, "loss": 0.2269, "num_tokens": 1357876401.0, "step": 7118 }, { "epoch": 2.4297661717016554, "grad_norm": 0.21557715589626023, "learning_rate": 1.0552604957005565e-05, "loss": 0.2331, "num_tokens": 1358103868.0, "step": 7119 }, { "epoch": 2.4301075268817205, "grad_norm": 0.2417908663670278, "learning_rate": 1.054628224582701e-05, "loss": 0.2357, "num_tokens": 1358299873.0, "step": 7120 }, { "epoch": 2.430448882061785, "grad_norm": 0.24174454370912424, "learning_rate": 1.0539959534648459e-05, "loss": 0.2449, "num_tokens": 1358500997.0, "step": 7121 }, { "epoch": 2.43079023724185, "grad_norm": 0.24837565422192667, "learning_rate": 1.0533636823469904e-05, "loss": 0.2268, "num_tokens": 1358669158.0, "step": 7122 }, { "epoch": 2.431131592421915, "grad_norm": 0.21634113019538717, "learning_rate": 1.0527314112291351e-05, "loss": 0.2441, "num_tokens": 1358899502.0, "step": 7123 }, { "epoch": 2.43147294760198, "grad_norm": 0.24511208101869433, "learning_rate": 1.0520991401112798e-05, "loss": 0.2543, "num_tokens": 1359081624.0, "step": 7124 }, { "epoch": 2.431814302782045, "grad_norm": 0.22605814957411424, "learning_rate": 1.0514668689934245e-05, "loss": 0.2178, "num_tokens": 1359273738.0, "step": 7125 }, { "epoch": 2.4321556579621095, "grad_norm": 0.2321415491003481, "learning_rate": 1.050834597875569e-05, "loss": 0.201, "num_tokens": 1359456229.0, "step": 7126 }, { "epoch": 2.4324970131421746, "grad_norm": 0.2200950008359297, "learning_rate": 1.0502023267577137e-05, "loss": 0.2278, "num_tokens": 1359685879.0, "step": 7127 }, { "epoch": 2.432838368322239, "grad_norm": 0.24545381190720028, "learning_rate": 1.0495700556398584e-05, "loss": 0.2256, "num_tokens": 1359868397.0, "step": 7128 }, { "epoch": 2.4331797235023043, "grad_norm": 0.23928434945847316, "learning_rate": 1.0489377845220031e-05, "loss": 0.2231, "num_tokens": 1360062662.0, "step": 7129 }, { "epoch": 2.433521078682369, "grad_norm": 0.2326532470604043, "learning_rate": 1.0483055134041478e-05, "loss": 0.2199, "num_tokens": 1360240224.0, "step": 7130 }, { "epoch": 2.433862433862434, "grad_norm": 0.24083417858545184, "learning_rate": 1.0476732422862923e-05, "loss": 0.2114, "num_tokens": 1360395131.0, "step": 7131 }, { "epoch": 2.4342037890424986, "grad_norm": 0.24247304316945992, "learning_rate": 1.047040971168437e-05, "loss": 0.2268, "num_tokens": 1360594264.0, "step": 7132 }, { "epoch": 2.4345451442225636, "grad_norm": 0.25108372445074284, "learning_rate": 1.0464087000505817e-05, "loss": 0.2172, "num_tokens": 1360784439.0, "step": 7133 }, { "epoch": 2.4348864994026282, "grad_norm": 0.22059911088239648, "learning_rate": 1.0457764289327264e-05, "loss": 0.238, "num_tokens": 1360996860.0, "step": 7134 }, { "epoch": 2.4352278545826933, "grad_norm": 0.24932487749011104, "learning_rate": 1.045144157814871e-05, "loss": 0.2249, "num_tokens": 1361161342.0, "step": 7135 }, { "epoch": 2.4355692097627584, "grad_norm": 0.22926251641653483, "learning_rate": 1.0445118866970158e-05, "loss": 0.2378, "num_tokens": 1361365531.0, "step": 7136 }, { "epoch": 2.435910564942823, "grad_norm": 0.21557088042947303, "learning_rate": 1.0438796155791604e-05, "loss": 0.2307, "num_tokens": 1361586476.0, "step": 7137 }, { "epoch": 2.436251920122888, "grad_norm": 0.23266951434773028, "learning_rate": 1.043247344461305e-05, "loss": 0.2436, "num_tokens": 1361802450.0, "step": 7138 }, { "epoch": 2.4365932753029527, "grad_norm": 0.252585613536116, "learning_rate": 1.0426150733434498e-05, "loss": 0.2151, "num_tokens": 1361984973.0, "step": 7139 }, { "epoch": 2.4369346304830177, "grad_norm": 0.2365307030152271, "learning_rate": 1.0419828022255945e-05, "loss": 0.2409, "num_tokens": 1362182922.0, "step": 7140 }, { "epoch": 2.4372759856630823, "grad_norm": 0.21101531490541728, "learning_rate": 1.041350531107739e-05, "loss": 0.2261, "num_tokens": 1362408621.0, "step": 7141 }, { "epoch": 2.4376173408431474, "grad_norm": 0.2328895981343125, "learning_rate": 1.0407182599898837e-05, "loss": 0.2187, "num_tokens": 1362594652.0, "step": 7142 }, { "epoch": 2.437958696023212, "grad_norm": 0.2806349197435599, "learning_rate": 1.0400859888720284e-05, "loss": 0.1961, "num_tokens": 1362731781.0, "step": 7143 }, { "epoch": 2.438300051203277, "grad_norm": 0.25199859560470433, "learning_rate": 1.039453717754173e-05, "loss": 0.2251, "num_tokens": 1362898454.0, "step": 7144 }, { "epoch": 2.4386414063833417, "grad_norm": 0.21975670459555052, "learning_rate": 1.0388214466363178e-05, "loss": 0.23, "num_tokens": 1363109022.0, "step": 7145 }, { "epoch": 2.4389827615634068, "grad_norm": 0.24225419156037037, "learning_rate": 1.0381891755184623e-05, "loss": 0.246, "num_tokens": 1363339850.0, "step": 7146 }, { "epoch": 2.439324116743472, "grad_norm": 0.24081077858651706, "learning_rate": 1.037556904400607e-05, "loss": 0.1955, "num_tokens": 1363512077.0, "step": 7147 }, { "epoch": 2.4396654719235364, "grad_norm": 0.24928541835105894, "learning_rate": 1.0369246332827517e-05, "loss": 0.2567, "num_tokens": 1363719742.0, "step": 7148 }, { "epoch": 2.440006827103601, "grad_norm": 0.23398156110594537, "learning_rate": 1.0362923621648964e-05, "loss": 0.2255, "num_tokens": 1363914169.0, "step": 7149 }, { "epoch": 2.440348182283666, "grad_norm": 0.23160728725763008, "learning_rate": 1.035660091047041e-05, "loss": 0.2396, "num_tokens": 1364109136.0, "step": 7150 }, { "epoch": 2.440689537463731, "grad_norm": 0.248967266071646, "learning_rate": 1.0350278199291858e-05, "loss": 0.2254, "num_tokens": 1364279274.0, "step": 7151 }, { "epoch": 2.441030892643796, "grad_norm": 0.24668837926920695, "learning_rate": 1.0343955488113303e-05, "loss": 0.2343, "num_tokens": 1364487625.0, "step": 7152 }, { "epoch": 2.441372247823861, "grad_norm": 0.22763604214200248, "learning_rate": 1.033763277693475e-05, "loss": 0.2581, "num_tokens": 1364727675.0, "step": 7153 }, { "epoch": 2.4417136030039255, "grad_norm": 0.2511908874682312, "learning_rate": 1.0331310065756197e-05, "loss": 0.2199, "num_tokens": 1364918366.0, "step": 7154 }, { "epoch": 2.4420549581839905, "grad_norm": 0.22917866727001, "learning_rate": 1.0324987354577644e-05, "loss": 0.2281, "num_tokens": 1365118901.0, "step": 7155 }, { "epoch": 2.442396313364055, "grad_norm": 0.21946671581274896, "learning_rate": 1.031866464339909e-05, "loss": 0.2296, "num_tokens": 1365336148.0, "step": 7156 }, { "epoch": 2.44273766854412, "grad_norm": 0.21770439824939455, "learning_rate": 1.0312341932220536e-05, "loss": 0.2189, "num_tokens": 1365535179.0, "step": 7157 }, { "epoch": 2.443079023724185, "grad_norm": 0.2324652195438431, "learning_rate": 1.0306019221041983e-05, "loss": 0.2052, "num_tokens": 1365697301.0, "step": 7158 }, { "epoch": 2.44342037890425, "grad_norm": 0.2294434796491031, "learning_rate": 1.029969650986343e-05, "loss": 0.2412, "num_tokens": 1365897335.0, "step": 7159 }, { "epoch": 2.4437617340843145, "grad_norm": 0.23606519932253986, "learning_rate": 1.0293373798684877e-05, "loss": 0.2518, "num_tokens": 1366099052.0, "step": 7160 }, { "epoch": 2.4441030892643796, "grad_norm": 0.2546904003871889, "learning_rate": 1.0287051087506323e-05, "loss": 0.216, "num_tokens": 1366276040.0, "step": 7161 }, { "epoch": 2.4444444444444446, "grad_norm": 0.23969637881057587, "learning_rate": 1.028072837632777e-05, "loss": 0.2364, "num_tokens": 1366461526.0, "step": 7162 }, { "epoch": 2.4447857996245093, "grad_norm": 0.24613844347374997, "learning_rate": 1.0274405665149217e-05, "loss": 0.2182, "num_tokens": 1366640583.0, "step": 7163 }, { "epoch": 2.4451271548045743, "grad_norm": 0.2577790441197805, "learning_rate": 1.0268082953970664e-05, "loss": 0.2361, "num_tokens": 1366817959.0, "step": 7164 }, { "epoch": 2.445468509984639, "grad_norm": 0.2545477830403439, "learning_rate": 1.0261760242792109e-05, "loss": 0.2319, "num_tokens": 1367008871.0, "step": 7165 }, { "epoch": 2.445809865164704, "grad_norm": 0.22105284796757005, "learning_rate": 1.0255437531613558e-05, "loss": 0.2553, "num_tokens": 1367225452.0, "step": 7166 }, { "epoch": 2.4461512203447686, "grad_norm": 0.2488407401267722, "learning_rate": 1.0249114820435003e-05, "loss": 0.2266, "num_tokens": 1367414927.0, "step": 7167 }, { "epoch": 2.4464925755248337, "grad_norm": 0.23110743268402145, "learning_rate": 1.024279210925645e-05, "loss": 0.2266, "num_tokens": 1367619940.0, "step": 7168 }, { "epoch": 2.4468339307048983, "grad_norm": 0.2272942339446011, "learning_rate": 1.0236469398077897e-05, "loss": 0.2459, "num_tokens": 1367833479.0, "step": 7169 }, { "epoch": 2.4471752858849634, "grad_norm": 0.21928783058865672, "learning_rate": 1.0230146686899344e-05, "loss": 0.2288, "num_tokens": 1368051136.0, "step": 7170 }, { "epoch": 2.447516641065028, "grad_norm": 0.2507266549645827, "learning_rate": 1.0223823975720789e-05, "loss": 0.2047, "num_tokens": 1368219411.0, "step": 7171 }, { "epoch": 2.447857996245093, "grad_norm": 0.23611264269845442, "learning_rate": 1.0217501264542236e-05, "loss": 0.2161, "num_tokens": 1368392533.0, "step": 7172 }, { "epoch": 2.448199351425158, "grad_norm": 0.24809829243358145, "learning_rate": 1.0211178553363683e-05, "loss": 0.1916, "num_tokens": 1368535237.0, "step": 7173 }, { "epoch": 2.4485407066052227, "grad_norm": 0.240859675754524, "learning_rate": 1.020485584218513e-05, "loss": 0.2197, "num_tokens": 1368707377.0, "step": 7174 }, { "epoch": 2.448882061785288, "grad_norm": 0.2506566113137714, "learning_rate": 1.0198533131006575e-05, "loss": 0.214, "num_tokens": 1368906985.0, "step": 7175 }, { "epoch": 2.4492234169653524, "grad_norm": 0.23847524263755177, "learning_rate": 1.0192210419828022e-05, "loss": 0.2294, "num_tokens": 1369135680.0, "step": 7176 }, { "epoch": 2.4495647721454175, "grad_norm": 0.25864599186720133, "learning_rate": 1.018588770864947e-05, "loss": 0.2218, "num_tokens": 1369309717.0, "step": 7177 }, { "epoch": 2.449906127325482, "grad_norm": 0.22217355675529585, "learning_rate": 1.0179564997470915e-05, "loss": 0.2209, "num_tokens": 1369522629.0, "step": 7178 }, { "epoch": 2.450247482505547, "grad_norm": 0.2515318135518997, "learning_rate": 1.0173242286292363e-05, "loss": 0.251, "num_tokens": 1369720882.0, "step": 7179 }, { "epoch": 2.4505888376856118, "grad_norm": 0.25669259584433696, "learning_rate": 1.0166919575113809e-05, "loss": 0.2278, "num_tokens": 1369895013.0, "step": 7180 }, { "epoch": 2.450930192865677, "grad_norm": 0.2387876201730112, "learning_rate": 1.0160596863935256e-05, "loss": 0.2148, "num_tokens": 1370089430.0, "step": 7181 }, { "epoch": 2.4512715480457414, "grad_norm": 0.23607927892045077, "learning_rate": 1.0154274152756703e-05, "loss": 0.2117, "num_tokens": 1370264905.0, "step": 7182 }, { "epoch": 2.4516129032258065, "grad_norm": 0.2129890890410378, "learning_rate": 1.014795144157815e-05, "loss": 0.2139, "num_tokens": 1370475324.0, "step": 7183 }, { "epoch": 2.4519542584058716, "grad_norm": 0.2256822529005301, "learning_rate": 1.0141628730399595e-05, "loss": 0.2245, "num_tokens": 1370661910.0, "step": 7184 }, { "epoch": 2.452295613585936, "grad_norm": 0.21623763547473882, "learning_rate": 1.0135306019221044e-05, "loss": 0.2561, "num_tokens": 1370907044.0, "step": 7185 }, { "epoch": 2.452636968766001, "grad_norm": 0.24347973826309321, "learning_rate": 1.0128983308042489e-05, "loss": 0.223, "num_tokens": 1371096332.0, "step": 7186 }, { "epoch": 2.452978323946066, "grad_norm": 0.2436793973271542, "learning_rate": 1.0122660596863936e-05, "loss": 0.215, "num_tokens": 1371275555.0, "step": 7187 }, { "epoch": 2.453319679126131, "grad_norm": 0.23193415142843973, "learning_rate": 1.0116337885685383e-05, "loss": 0.2298, "num_tokens": 1371456191.0, "step": 7188 }, { "epoch": 2.4536610343061955, "grad_norm": 0.2523472883536546, "learning_rate": 1.0110015174506828e-05, "loss": 0.2339, "num_tokens": 1371648321.0, "step": 7189 }, { "epoch": 2.4540023894862606, "grad_norm": 0.20900006095789359, "learning_rate": 1.0103692463328275e-05, "loss": 0.2544, "num_tokens": 1371899205.0, "step": 7190 }, { "epoch": 2.454343744666325, "grad_norm": 0.23342954615219885, "learning_rate": 1.0097369752149722e-05, "loss": 0.211, "num_tokens": 1372070373.0, "step": 7191 }, { "epoch": 2.4546850998463903, "grad_norm": 0.25872222305900167, "learning_rate": 1.0091047040971169e-05, "loss": 0.2208, "num_tokens": 1372243520.0, "step": 7192 }, { "epoch": 2.455026455026455, "grad_norm": 0.2578529464634885, "learning_rate": 1.0084724329792614e-05, "loss": 0.2185, "num_tokens": 1372401533.0, "step": 7193 }, { "epoch": 2.45536781020652, "grad_norm": 0.23491381372180287, "learning_rate": 1.0078401618614063e-05, "loss": 0.2127, "num_tokens": 1372561178.0, "step": 7194 }, { "epoch": 2.4557091653865846, "grad_norm": 0.2490714355481211, "learning_rate": 1.0072078907435508e-05, "loss": 0.2308, "num_tokens": 1372738850.0, "step": 7195 }, { "epoch": 2.4560505205666496, "grad_norm": 0.2384132916454865, "learning_rate": 1.0065756196256955e-05, "loss": 0.2218, "num_tokens": 1372911944.0, "step": 7196 }, { "epoch": 2.4563918757467142, "grad_norm": 0.2429811007563664, "learning_rate": 1.0059433485078402e-05, "loss": 0.2491, "num_tokens": 1373099949.0, "step": 7197 }, { "epoch": 2.4567332309267793, "grad_norm": 0.24379348173098817, "learning_rate": 1.005311077389985e-05, "loss": 0.2141, "num_tokens": 1373259898.0, "step": 7198 }, { "epoch": 2.4570745861068444, "grad_norm": 0.20977342577618116, "learning_rate": 1.0046788062721294e-05, "loss": 0.2342, "num_tokens": 1373493886.0, "step": 7199 }, { "epoch": 2.457415941286909, "grad_norm": 0.24800267553167143, "learning_rate": 1.0040465351542743e-05, "loss": 0.2258, "num_tokens": 1373667049.0, "step": 7200 }, { "epoch": 2.457757296466974, "grad_norm": 0.2262862406992079, "learning_rate": 1.0034142640364188e-05, "loss": 0.2225, "num_tokens": 1373872710.0, "step": 7201 }, { "epoch": 2.4580986516470387, "grad_norm": 0.24710653967407345, "learning_rate": 1.0027819929185635e-05, "loss": 0.2172, "num_tokens": 1374044842.0, "step": 7202 }, { "epoch": 2.4584400068271037, "grad_norm": 0.2542091105289461, "learning_rate": 1.0021497218007082e-05, "loss": 0.2182, "num_tokens": 1374205915.0, "step": 7203 }, { "epoch": 2.4587813620071683, "grad_norm": 0.2510655533020399, "learning_rate": 1.0015174506828528e-05, "loss": 0.2256, "num_tokens": 1374388728.0, "step": 7204 }, { "epoch": 2.4591227171872334, "grad_norm": 0.255820292704408, "learning_rate": 1.0008851795649975e-05, "loss": 0.2168, "num_tokens": 1374541483.0, "step": 7205 }, { "epoch": 2.459464072367298, "grad_norm": 0.2759521470733944, "learning_rate": 1.0002529084471422e-05, "loss": 0.2148, "num_tokens": 1374719969.0, "step": 7206 }, { "epoch": 2.459805427547363, "grad_norm": 0.2484354719182421, "learning_rate": 9.996206373292869e-06, "loss": 0.237, "num_tokens": 1374916254.0, "step": 7207 }, { "epoch": 2.4601467827274277, "grad_norm": 0.22204711257636273, "learning_rate": 9.989883662114314e-06, "loss": 0.2142, "num_tokens": 1375089848.0, "step": 7208 }, { "epoch": 2.4604881379074928, "grad_norm": 0.2719544961839939, "learning_rate": 9.983560950935763e-06, "loss": 0.2291, "num_tokens": 1375228049.0, "step": 7209 }, { "epoch": 2.460829493087558, "grad_norm": 0.22498944966642448, "learning_rate": 9.977238239757208e-06, "loss": 0.2174, "num_tokens": 1375424066.0, "step": 7210 }, { "epoch": 2.4611708482676224, "grad_norm": 0.24424415788359496, "learning_rate": 9.970915528578655e-06, "loss": 0.2344, "num_tokens": 1375624913.0, "step": 7211 }, { "epoch": 2.4615122034476875, "grad_norm": 0.2276119477022166, "learning_rate": 9.964592817400102e-06, "loss": 0.2188, "num_tokens": 1375818266.0, "step": 7212 }, { "epoch": 2.461853558627752, "grad_norm": 0.2155679412944592, "learning_rate": 9.958270106221549e-06, "loss": 0.2411, "num_tokens": 1376038175.0, "step": 7213 }, { "epoch": 2.462194913807817, "grad_norm": 0.23186163350167158, "learning_rate": 9.951947395042994e-06, "loss": 0.2218, "num_tokens": 1376209194.0, "step": 7214 }, { "epoch": 2.462536268987882, "grad_norm": 0.2383746476430165, "learning_rate": 9.945624683864443e-06, "loss": 0.2259, "num_tokens": 1376410340.0, "step": 7215 }, { "epoch": 2.462877624167947, "grad_norm": 0.25513225665833844, "learning_rate": 9.939301972685888e-06, "loss": 0.2046, "num_tokens": 1376561998.0, "step": 7216 }, { "epoch": 2.4632189793480115, "grad_norm": 0.2304654505154152, "learning_rate": 9.932979261507335e-06, "loss": 0.2071, "num_tokens": 1376725582.0, "step": 7217 }, { "epoch": 2.4635603345280765, "grad_norm": 0.24760694466226577, "learning_rate": 9.926656550328782e-06, "loss": 0.2315, "num_tokens": 1376900201.0, "step": 7218 }, { "epoch": 2.463901689708141, "grad_norm": 0.21463343724322848, "learning_rate": 9.920333839150227e-06, "loss": 0.2414, "num_tokens": 1377151450.0, "step": 7219 }, { "epoch": 2.4642430448882062, "grad_norm": 0.23737829079530098, "learning_rate": 9.914011127971674e-06, "loss": 0.2238, "num_tokens": 1377330399.0, "step": 7220 }, { "epoch": 2.464584400068271, "grad_norm": 0.2602691762567852, "learning_rate": 9.907688416793121e-06, "loss": 0.2183, "num_tokens": 1377513641.0, "step": 7221 }, { "epoch": 2.464925755248336, "grad_norm": 0.2326016998549224, "learning_rate": 9.901365705614568e-06, "loss": 0.2187, "num_tokens": 1377686303.0, "step": 7222 }, { "epoch": 2.4652671104284005, "grad_norm": 0.2350728260288083, "learning_rate": 9.895042994436014e-06, "loss": 0.2335, "num_tokens": 1377878497.0, "step": 7223 }, { "epoch": 2.4656084656084656, "grad_norm": 0.22292980722439068, "learning_rate": 9.888720283257462e-06, "loss": 0.2223, "num_tokens": 1378088152.0, "step": 7224 }, { "epoch": 2.4659498207885306, "grad_norm": 0.22159610444333624, "learning_rate": 9.882397572078908e-06, "loss": 0.2101, "num_tokens": 1378281977.0, "step": 7225 }, { "epoch": 2.4662911759685953, "grad_norm": 0.21263473030992708, "learning_rate": 9.876074860900355e-06, "loss": 0.2122, "num_tokens": 1378490274.0, "step": 7226 }, { "epoch": 2.4666325311486603, "grad_norm": 0.25097296532625973, "learning_rate": 9.869752149721802e-06, "loss": 0.2181, "num_tokens": 1378670191.0, "step": 7227 }, { "epoch": 2.466973886328725, "grad_norm": 0.24164574439855205, "learning_rate": 9.863429438543249e-06, "loss": 0.2131, "num_tokens": 1378848245.0, "step": 7228 }, { "epoch": 2.46731524150879, "grad_norm": 0.23129614648527425, "learning_rate": 9.857106727364694e-06, "loss": 0.2581, "num_tokens": 1379062287.0, "step": 7229 }, { "epoch": 2.4676565966888546, "grad_norm": 0.22674856126979143, "learning_rate": 9.850784016186143e-06, "loss": 0.248, "num_tokens": 1379264599.0, "step": 7230 }, { "epoch": 2.4679979518689197, "grad_norm": 0.2270351655366929, "learning_rate": 9.844461305007588e-06, "loss": 0.2403, "num_tokens": 1379468004.0, "step": 7231 }, { "epoch": 2.4683393070489843, "grad_norm": 0.24352815462459157, "learning_rate": 9.838138593829035e-06, "loss": 0.2152, "num_tokens": 1379632372.0, "step": 7232 }, { "epoch": 2.4686806622290494, "grad_norm": 0.24682474150969874, "learning_rate": 9.831815882650482e-06, "loss": 0.2386, "num_tokens": 1379809271.0, "step": 7233 }, { "epoch": 2.469022017409114, "grad_norm": 0.24778873084467903, "learning_rate": 9.825493171471927e-06, "loss": 0.2317, "num_tokens": 1379992568.0, "step": 7234 }, { "epoch": 2.469363372589179, "grad_norm": 0.2596963894292348, "learning_rate": 9.819170460293374e-06, "loss": 0.259, "num_tokens": 1380194626.0, "step": 7235 }, { "epoch": 2.469704727769244, "grad_norm": 0.23002074678068954, "learning_rate": 9.812847749114821e-06, "loss": 0.2451, "num_tokens": 1380406339.0, "step": 7236 }, { "epoch": 2.4700460829493087, "grad_norm": 0.2475136072231504, "learning_rate": 9.806525037936268e-06, "loss": 0.2357, "num_tokens": 1380563480.0, "step": 7237 }, { "epoch": 2.470387438129374, "grad_norm": 0.2380651677690564, "learning_rate": 9.800202326757713e-06, "loss": 0.2211, "num_tokens": 1380747415.0, "step": 7238 }, { "epoch": 2.4707287933094384, "grad_norm": 0.250034759892883, "learning_rate": 9.793879615579162e-06, "loss": 0.2106, "num_tokens": 1380916336.0, "step": 7239 }, { "epoch": 2.4710701484895035, "grad_norm": 0.22451706081244832, "learning_rate": 9.787556904400607e-06, "loss": 0.2102, "num_tokens": 1381082616.0, "step": 7240 }, { "epoch": 2.471411503669568, "grad_norm": 0.24450958652531155, "learning_rate": 9.781234193222054e-06, "loss": 0.2149, "num_tokens": 1381249521.0, "step": 7241 }, { "epoch": 2.471752858849633, "grad_norm": 0.26186056589901335, "learning_rate": 9.7749114820435e-06, "loss": 0.2292, "num_tokens": 1381414149.0, "step": 7242 }, { "epoch": 2.4720942140296978, "grad_norm": 0.2393447308625067, "learning_rate": 9.768588770864948e-06, "loss": 0.2308, "num_tokens": 1381633148.0, "step": 7243 }, { "epoch": 2.472435569209763, "grad_norm": 0.2239650398586159, "learning_rate": 9.762266059686393e-06, "loss": 0.245, "num_tokens": 1381864128.0, "step": 7244 }, { "epoch": 2.4727769243898274, "grad_norm": 0.21968274801503182, "learning_rate": 9.75594334850784e-06, "loss": 0.2245, "num_tokens": 1382068627.0, "step": 7245 }, { "epoch": 2.4731182795698925, "grad_norm": 0.21939203397635187, "learning_rate": 9.749620637329287e-06, "loss": 0.2167, "num_tokens": 1382259781.0, "step": 7246 }, { "epoch": 2.4734596347499576, "grad_norm": 0.23143314289119823, "learning_rate": 9.743297926150734e-06, "loss": 0.2289, "num_tokens": 1382463261.0, "step": 7247 }, { "epoch": 2.473800989930022, "grad_norm": 0.24311322364315777, "learning_rate": 9.73697521497218e-06, "loss": 0.2281, "num_tokens": 1382643978.0, "step": 7248 }, { "epoch": 2.4741423451100872, "grad_norm": 0.2217192720265227, "learning_rate": 9.730652503793627e-06, "loss": 0.2108, "num_tokens": 1382825445.0, "step": 7249 }, { "epoch": 2.474483700290152, "grad_norm": 0.2302841884814935, "learning_rate": 9.724329792615074e-06, "loss": 0.2269, "num_tokens": 1383034394.0, "step": 7250 }, { "epoch": 2.474825055470217, "grad_norm": 0.24114536567303538, "learning_rate": 9.71800708143652e-06, "loss": 0.2221, "num_tokens": 1383206606.0, "step": 7251 }, { "epoch": 2.4751664106502815, "grad_norm": 0.21472096305831476, "learning_rate": 9.711684370257968e-06, "loss": 0.2299, "num_tokens": 1383421378.0, "step": 7252 }, { "epoch": 2.4755077658303466, "grad_norm": 0.23558711604396795, "learning_rate": 9.705361659079413e-06, "loss": 0.2272, "num_tokens": 1383607894.0, "step": 7253 }, { "epoch": 2.475849121010411, "grad_norm": 0.7622692658832585, "learning_rate": 9.69903894790086e-06, "loss": 0.2313, "num_tokens": 1383804555.0, "step": 7254 }, { "epoch": 2.4761904761904763, "grad_norm": 0.2308642018185701, "learning_rate": 9.692716236722307e-06, "loss": 0.2572, "num_tokens": 1384006945.0, "step": 7255 }, { "epoch": 2.476531831370541, "grad_norm": 0.25396757687671934, "learning_rate": 9.686393525543754e-06, "loss": 0.2185, "num_tokens": 1384185032.0, "step": 7256 }, { "epoch": 2.476873186550606, "grad_norm": 0.24446687795328165, "learning_rate": 9.6800708143652e-06, "loss": 0.2386, "num_tokens": 1384394482.0, "step": 7257 }, { "epoch": 2.4772145417306706, "grad_norm": 0.226520359187937, "learning_rate": 9.673748103186648e-06, "loss": 0.2118, "num_tokens": 1384567138.0, "step": 7258 }, { "epoch": 2.4775558969107356, "grad_norm": 0.2580116454583367, "learning_rate": 9.667425392008093e-06, "loss": 0.2152, "num_tokens": 1384719143.0, "step": 7259 }, { "epoch": 2.4778972520908003, "grad_norm": 0.27714539947800304, "learning_rate": 9.66110268082954e-06, "loss": 0.236, "num_tokens": 1384899352.0, "step": 7260 }, { "epoch": 2.4782386072708653, "grad_norm": 0.24476180416839882, "learning_rate": 9.654779969650987e-06, "loss": 0.2323, "num_tokens": 1385125795.0, "step": 7261 }, { "epoch": 2.4785799624509304, "grad_norm": 0.256950225120096, "learning_rate": 9.648457258472434e-06, "loss": 0.233, "num_tokens": 1385304230.0, "step": 7262 }, { "epoch": 2.478921317630995, "grad_norm": 0.24945331964841289, "learning_rate": 9.64213454729388e-06, "loss": 0.2453, "num_tokens": 1385498130.0, "step": 7263 }, { "epoch": 2.47926267281106, "grad_norm": 0.25243625412481846, "learning_rate": 9.635811836115326e-06, "loss": 0.2284, "num_tokens": 1385688723.0, "step": 7264 }, { "epoch": 2.4796040279911247, "grad_norm": 0.21429558994523895, "learning_rate": 9.629489124936773e-06, "loss": 0.2285, "num_tokens": 1385897427.0, "step": 7265 }, { "epoch": 2.4799453831711897, "grad_norm": 0.2445949373864535, "learning_rate": 9.623166413758219e-06, "loss": 0.2177, "num_tokens": 1386095433.0, "step": 7266 }, { "epoch": 2.4802867383512543, "grad_norm": 0.23735701909964654, "learning_rate": 9.616843702579667e-06, "loss": 0.2121, "num_tokens": 1386296295.0, "step": 7267 }, { "epoch": 2.4806280935313194, "grad_norm": 0.25738646338402554, "learning_rate": 9.610520991401113e-06, "loss": 0.209, "num_tokens": 1386466834.0, "step": 7268 }, { "epoch": 2.480969448711384, "grad_norm": 0.2580508857428152, "learning_rate": 9.60419828022256e-06, "loss": 0.2123, "num_tokens": 1386618785.0, "step": 7269 }, { "epoch": 2.481310803891449, "grad_norm": 0.49066136395876153, "learning_rate": 9.597875569044007e-06, "loss": 0.2429, "num_tokens": 1386844556.0, "step": 7270 }, { "epoch": 2.4816521590715137, "grad_norm": 0.23291930427638866, "learning_rate": 9.591552857865454e-06, "loss": 0.2298, "num_tokens": 1387049943.0, "step": 7271 }, { "epoch": 2.4819935142515788, "grad_norm": 0.2426426684418762, "learning_rate": 9.585230146686899e-06, "loss": 0.2214, "num_tokens": 1387227237.0, "step": 7272 }, { "epoch": 2.482334869431644, "grad_norm": 0.2419820771233134, "learning_rate": 9.578907435508348e-06, "loss": 0.2157, "num_tokens": 1387405629.0, "step": 7273 }, { "epoch": 2.4826762246117084, "grad_norm": 0.22636381976465672, "learning_rate": 9.572584724329793e-06, "loss": 0.2299, "num_tokens": 1387644091.0, "step": 7274 }, { "epoch": 2.4830175797917735, "grad_norm": 0.24869016824946125, "learning_rate": 9.56626201315124e-06, "loss": 0.2125, "num_tokens": 1387810563.0, "step": 7275 }, { "epoch": 2.483358934971838, "grad_norm": 0.23919452926532725, "learning_rate": 9.559939301972687e-06, "loss": 0.2143, "num_tokens": 1387977068.0, "step": 7276 }, { "epoch": 2.483700290151903, "grad_norm": 0.2781867295041334, "learning_rate": 9.553616590794134e-06, "loss": 0.2594, "num_tokens": 1388181917.0, "step": 7277 }, { "epoch": 2.484041645331968, "grad_norm": 0.2208194149118119, "learning_rate": 9.547293879615579e-06, "loss": 0.2587, "num_tokens": 1388414766.0, "step": 7278 }, { "epoch": 2.484383000512033, "grad_norm": 0.2605483622735345, "learning_rate": 9.540971168437026e-06, "loss": 0.2266, "num_tokens": 1388593296.0, "step": 7279 }, { "epoch": 2.4847243556920975, "grad_norm": 0.2518208387536771, "learning_rate": 9.534648457258473e-06, "loss": 0.2387, "num_tokens": 1388779446.0, "step": 7280 }, { "epoch": 2.4850657108721625, "grad_norm": 0.24363263219865103, "learning_rate": 9.528325746079918e-06, "loss": 0.2289, "num_tokens": 1388961060.0, "step": 7281 }, { "epoch": 2.485407066052227, "grad_norm": 0.22747488020012324, "learning_rate": 9.522003034901367e-06, "loss": 0.2076, "num_tokens": 1389146230.0, "step": 7282 }, { "epoch": 2.4857484212322922, "grad_norm": 0.2609847752704279, "learning_rate": 9.515680323722812e-06, "loss": 0.2188, "num_tokens": 1389286278.0, "step": 7283 }, { "epoch": 2.4860897764123573, "grad_norm": 0.24424014442349845, "learning_rate": 9.50935761254426e-06, "loss": 0.2236, "num_tokens": 1389474503.0, "step": 7284 }, { "epoch": 2.486431131592422, "grad_norm": 0.26056425092048113, "learning_rate": 9.503034901365706e-06, "loss": 0.2521, "num_tokens": 1389662562.0, "step": 7285 }, { "epoch": 2.4867724867724865, "grad_norm": 0.21494709647927773, "learning_rate": 9.496712190187153e-06, "loss": 0.2014, "num_tokens": 1389842724.0, "step": 7286 }, { "epoch": 2.4871138419525516, "grad_norm": 0.2259312099274558, "learning_rate": 9.490389479008599e-06, "loss": 0.2003, "num_tokens": 1390022179.0, "step": 7287 }, { "epoch": 2.4874551971326166, "grad_norm": 0.22690574915745923, "learning_rate": 9.484066767830047e-06, "loss": 0.2179, "num_tokens": 1390216267.0, "step": 7288 }, { "epoch": 2.4877965523126813, "grad_norm": 0.3801631530802404, "learning_rate": 9.477744056651492e-06, "loss": 0.2591, "num_tokens": 1390432467.0, "step": 7289 }, { "epoch": 2.4881379074927463, "grad_norm": 0.21632245571124675, "learning_rate": 9.47142134547294e-06, "loss": 0.2358, "num_tokens": 1390648789.0, "step": 7290 }, { "epoch": 2.488479262672811, "grad_norm": 0.26359133782971195, "learning_rate": 9.465098634294386e-06, "loss": 0.2436, "num_tokens": 1390805161.0, "step": 7291 }, { "epoch": 2.488820617852876, "grad_norm": 0.2595929744948399, "learning_rate": 9.458775923115833e-06, "loss": 0.2327, "num_tokens": 1390976231.0, "step": 7292 }, { "epoch": 2.4891619730329406, "grad_norm": 0.23710261416620082, "learning_rate": 9.452453211937279e-06, "loss": 0.1912, "num_tokens": 1391134944.0, "step": 7293 }, { "epoch": 2.4895033282130057, "grad_norm": 0.239496649948441, "learning_rate": 9.446130500758726e-06, "loss": 0.2171, "num_tokens": 1391314047.0, "step": 7294 }, { "epoch": 2.4898446833930703, "grad_norm": 0.227588640626819, "learning_rate": 9.439807789580173e-06, "loss": 0.2282, "num_tokens": 1391506338.0, "step": 7295 }, { "epoch": 2.4901860385731354, "grad_norm": 0.22379130398698446, "learning_rate": 9.433485078401618e-06, "loss": 0.232, "num_tokens": 1391713524.0, "step": 7296 }, { "epoch": 2.4905273937532, "grad_norm": 0.2676486857723231, "learning_rate": 9.427162367223067e-06, "loss": 0.2226, "num_tokens": 1391852726.0, "step": 7297 }, { "epoch": 2.490868748933265, "grad_norm": 0.24019930091516084, "learning_rate": 9.420839656044512e-06, "loss": 0.193, "num_tokens": 1392008962.0, "step": 7298 }, { "epoch": 2.49121010411333, "grad_norm": 0.21088924879430193, "learning_rate": 9.414516944865959e-06, "loss": 0.2185, "num_tokens": 1392218275.0, "step": 7299 }, { "epoch": 2.4915514592933947, "grad_norm": 0.24557715559253168, "learning_rate": 9.408194233687406e-06, "loss": 0.2311, "num_tokens": 1392398805.0, "step": 7300 }, { "epoch": 2.49189281447346, "grad_norm": 0.22588286607885044, "learning_rate": 9.401871522508853e-06, "loss": 0.2417, "num_tokens": 1392616557.0, "step": 7301 }, { "epoch": 2.4922341696535244, "grad_norm": 0.2344611498591408, "learning_rate": 9.395548811330298e-06, "loss": 0.2143, "num_tokens": 1392821144.0, "step": 7302 }, { "epoch": 2.4925755248335895, "grad_norm": 0.26078415441530806, "learning_rate": 9.389226100151747e-06, "loss": 0.2213, "num_tokens": 1393006631.0, "step": 7303 }, { "epoch": 2.492916880013654, "grad_norm": 0.2264339927013657, "learning_rate": 9.382903388973192e-06, "loss": 0.2156, "num_tokens": 1393203900.0, "step": 7304 }, { "epoch": 2.493258235193719, "grad_norm": 0.22999556467759755, "learning_rate": 9.376580677794639e-06, "loss": 0.2146, "num_tokens": 1393411761.0, "step": 7305 }, { "epoch": 2.4935995903737838, "grad_norm": 0.21566583889459257, "learning_rate": 9.370257966616084e-06, "loss": 0.2522, "num_tokens": 1393635245.0, "step": 7306 }, { "epoch": 2.493940945553849, "grad_norm": 0.23151922380030754, "learning_rate": 9.363935255437533e-06, "loss": 0.2112, "num_tokens": 1393825539.0, "step": 7307 }, { "epoch": 2.4942823007339134, "grad_norm": 0.2383088337109872, "learning_rate": 9.357612544258978e-06, "loss": 0.2104, "num_tokens": 1393979861.0, "step": 7308 }, { "epoch": 2.4946236559139785, "grad_norm": 0.22876098872978248, "learning_rate": 9.351289833080425e-06, "loss": 0.242, "num_tokens": 1394171005.0, "step": 7309 }, { "epoch": 2.4949650110940436, "grad_norm": 0.2607677316392694, "learning_rate": 9.344967121901872e-06, "loss": 0.2264, "num_tokens": 1394371936.0, "step": 7310 }, { "epoch": 2.495306366274108, "grad_norm": 0.25803607632821135, "learning_rate": 9.338644410723318e-06, "loss": 0.2292, "num_tokens": 1394535116.0, "step": 7311 }, { "epoch": 2.4956477214541732, "grad_norm": 0.2272311383068904, "learning_rate": 9.332321699544765e-06, "loss": 0.2437, "num_tokens": 1394727399.0, "step": 7312 }, { "epoch": 2.495989076634238, "grad_norm": 0.2421049427349723, "learning_rate": 9.325998988366212e-06, "loss": 0.2402, "num_tokens": 1394922281.0, "step": 7313 }, { "epoch": 2.496330431814303, "grad_norm": 0.2468480224103774, "learning_rate": 9.319676277187659e-06, "loss": 0.2044, "num_tokens": 1395123877.0, "step": 7314 }, { "epoch": 2.4966717869943675, "grad_norm": 0.22394456104479735, "learning_rate": 9.313353566009104e-06, "loss": 0.2201, "num_tokens": 1395340625.0, "step": 7315 }, { "epoch": 2.4970131421744326, "grad_norm": 0.2510646188250626, "learning_rate": 9.307030854830553e-06, "loss": 0.2281, "num_tokens": 1395523942.0, "step": 7316 }, { "epoch": 2.497354497354497, "grad_norm": 0.24183427602748514, "learning_rate": 9.300708143651998e-06, "loss": 0.2179, "num_tokens": 1395707135.0, "step": 7317 }, { "epoch": 2.4976958525345623, "grad_norm": 0.22680110231119346, "learning_rate": 9.294385432473445e-06, "loss": 0.2126, "num_tokens": 1395892185.0, "step": 7318 }, { "epoch": 2.498037207714627, "grad_norm": 0.22904791832506774, "learning_rate": 9.288062721294892e-06, "loss": 0.205, "num_tokens": 1396075539.0, "step": 7319 }, { "epoch": 2.498378562894692, "grad_norm": 0.22998557329249722, "learning_rate": 9.281740010116339e-06, "loss": 0.2246, "num_tokens": 1396255737.0, "step": 7320 }, { "epoch": 2.498719918074757, "grad_norm": 0.2207469462770775, "learning_rate": 9.275417298937784e-06, "loss": 0.2521, "num_tokens": 1396483591.0, "step": 7321 }, { "epoch": 2.4990612732548216, "grad_norm": 0.2558986547628265, "learning_rate": 9.269094587759233e-06, "loss": 0.2261, "num_tokens": 1396673087.0, "step": 7322 }, { "epoch": 2.4994026284348863, "grad_norm": 0.23434120216743062, "learning_rate": 9.262771876580678e-06, "loss": 0.2301, "num_tokens": 1396861137.0, "step": 7323 }, { "epoch": 2.4997439836149513, "grad_norm": 0.2198351980995842, "learning_rate": 9.256449165402125e-06, "loss": 0.2452, "num_tokens": 1397127606.0, "step": 7324 }, { "epoch": 2.5000853387950164, "grad_norm": 0.2392605704108987, "learning_rate": 9.250126454223572e-06, "loss": 0.2093, "num_tokens": 1397312262.0, "step": 7325 }, { "epoch": 2.500426693975081, "grad_norm": 0.27279538208812104, "learning_rate": 9.243803743045017e-06, "loss": 0.2302, "num_tokens": 1397471524.0, "step": 7326 }, { "epoch": 2.500768049155146, "grad_norm": 0.253240741838854, "learning_rate": 9.237481031866464e-06, "loss": 0.2401, "num_tokens": 1397668684.0, "step": 7327 }, { "epoch": 2.5011094043352107, "grad_norm": 0.24081952511371313, "learning_rate": 9.231158320687911e-06, "loss": 0.2211, "num_tokens": 1397845283.0, "step": 7328 }, { "epoch": 2.5014507595152757, "grad_norm": 0.24660878218578744, "learning_rate": 9.224835609509358e-06, "loss": 0.2159, "num_tokens": 1398008129.0, "step": 7329 }, { "epoch": 2.5017921146953404, "grad_norm": 0.22303889382805364, "learning_rate": 9.218512898330804e-06, "loss": 0.2215, "num_tokens": 1398193753.0, "step": 7330 }, { "epoch": 2.5021334698754054, "grad_norm": 0.2318345389676767, "learning_rate": 9.212190187152252e-06, "loss": 0.2144, "num_tokens": 1398393237.0, "step": 7331 }, { "epoch": 2.5024748250554705, "grad_norm": 0.23730808896643232, "learning_rate": 9.205867475973698e-06, "loss": 0.2432, "num_tokens": 1398584725.0, "step": 7332 }, { "epoch": 2.502816180235535, "grad_norm": 0.2671170204207401, "learning_rate": 9.199544764795144e-06, "loss": 0.2413, "num_tokens": 1398767613.0, "step": 7333 }, { "epoch": 2.5031575354155997, "grad_norm": 0.2636726789116616, "learning_rate": 9.193222053616591e-06, "loss": 0.2331, "num_tokens": 1398950361.0, "step": 7334 }, { "epoch": 2.5034988905956648, "grad_norm": 0.28028935784613074, "learning_rate": 9.186899342438038e-06, "loss": 0.2205, "num_tokens": 1399107870.0, "step": 7335 }, { "epoch": 2.50384024577573, "grad_norm": 0.2627856832266262, "learning_rate": 9.180576631259484e-06, "loss": 0.2047, "num_tokens": 1399265724.0, "step": 7336 }, { "epoch": 2.5041816009557945, "grad_norm": 0.23505616342247299, "learning_rate": 9.174253920080932e-06, "loss": 0.2456, "num_tokens": 1399467187.0, "step": 7337 }, { "epoch": 2.5045229561358595, "grad_norm": 0.22830338653094923, "learning_rate": 9.167931208902378e-06, "loss": 0.2307, "num_tokens": 1399671558.0, "step": 7338 }, { "epoch": 2.504864311315924, "grad_norm": 0.24123172568995072, "learning_rate": 9.161608497723825e-06, "loss": 0.2094, "num_tokens": 1399841980.0, "step": 7339 }, { "epoch": 2.505205666495989, "grad_norm": 0.2312204411066297, "learning_rate": 9.155285786545272e-06, "loss": 0.231, "num_tokens": 1400060128.0, "step": 7340 }, { "epoch": 2.505547021676054, "grad_norm": 0.23625118915401988, "learning_rate": 9.148963075366717e-06, "loss": 0.2338, "num_tokens": 1400284911.0, "step": 7341 }, { "epoch": 2.505888376856119, "grad_norm": 0.26139164070179566, "learning_rate": 9.142640364188164e-06, "loss": 0.2223, "num_tokens": 1400478288.0, "step": 7342 }, { "epoch": 2.5062297320361835, "grad_norm": 0.2406245621126387, "learning_rate": 9.136317653009611e-06, "loss": 0.2072, "num_tokens": 1400649623.0, "step": 7343 }, { "epoch": 2.5065710872162486, "grad_norm": 0.24531876542528686, "learning_rate": 9.129994941831058e-06, "loss": 0.2389, "num_tokens": 1400858428.0, "step": 7344 }, { "epoch": 2.506912442396313, "grad_norm": 0.24133128265237094, "learning_rate": 9.123672230652503e-06, "loss": 0.2166, "num_tokens": 1401029252.0, "step": 7345 }, { "epoch": 2.5072537975763782, "grad_norm": 0.25315918697434103, "learning_rate": 9.117349519473952e-06, "loss": 0.2157, "num_tokens": 1401203335.0, "step": 7346 }, { "epoch": 2.5075951527564433, "grad_norm": 0.22232776685966568, "learning_rate": 9.111026808295397e-06, "loss": 0.2274, "num_tokens": 1401421813.0, "step": 7347 }, { "epoch": 2.507936507936508, "grad_norm": 0.26216846489139456, "learning_rate": 9.104704097116844e-06, "loss": 0.2405, "num_tokens": 1401588462.0, "step": 7348 }, { "epoch": 2.5082778631165725, "grad_norm": 0.23996219976177133, "learning_rate": 9.098381385938291e-06, "loss": 0.2291, "num_tokens": 1401770067.0, "step": 7349 }, { "epoch": 2.5086192182966376, "grad_norm": 0.2330528277608754, "learning_rate": 9.092058674759738e-06, "loss": 0.2296, "num_tokens": 1401976604.0, "step": 7350 }, { "epoch": 2.5089605734767026, "grad_norm": 0.22794721586856428, "learning_rate": 9.085735963581183e-06, "loss": 0.2255, "num_tokens": 1402184478.0, "step": 7351 }, { "epoch": 2.5093019286567673, "grad_norm": 0.26508736653416415, "learning_rate": 9.079413252402632e-06, "loss": 0.2354, "num_tokens": 1402358705.0, "step": 7352 }, { "epoch": 2.5096432838368323, "grad_norm": 0.2431242569337372, "learning_rate": 9.073090541224077e-06, "loss": 0.2382, "num_tokens": 1402570132.0, "step": 7353 }, { "epoch": 2.509984639016897, "grad_norm": 0.24317068547211976, "learning_rate": 9.066767830045524e-06, "loss": 0.2382, "num_tokens": 1402760194.0, "step": 7354 }, { "epoch": 2.510325994196962, "grad_norm": 0.2485754223689809, "learning_rate": 9.060445118866971e-06, "loss": 0.2221, "num_tokens": 1402935091.0, "step": 7355 }, { "epoch": 2.5106673493770266, "grad_norm": 0.22990986373220743, "learning_rate": 9.054122407688417e-06, "loss": 0.2257, "num_tokens": 1403143392.0, "step": 7356 }, { "epoch": 2.5110087045570917, "grad_norm": 0.2439712890455195, "learning_rate": 9.047799696509864e-06, "loss": 0.2476, "num_tokens": 1403359042.0, "step": 7357 }, { "epoch": 2.5113500597371567, "grad_norm": 0.24214607034931285, "learning_rate": 9.04147698533131e-06, "loss": 0.2405, "num_tokens": 1403543289.0, "step": 7358 }, { "epoch": 2.5116914149172214, "grad_norm": 0.21597503122451728, "learning_rate": 9.035154274152758e-06, "loss": 0.2117, "num_tokens": 1403744529.0, "step": 7359 }, { "epoch": 2.512032770097286, "grad_norm": 0.22452997283878423, "learning_rate": 9.028831562974203e-06, "loss": 0.2225, "num_tokens": 1403930741.0, "step": 7360 }, { "epoch": 2.512374125277351, "grad_norm": 0.22991123333909866, "learning_rate": 9.022508851795652e-06, "loss": 0.2379, "num_tokens": 1404128438.0, "step": 7361 }, { "epoch": 2.512715480457416, "grad_norm": 0.24507032912993115, "learning_rate": 9.016186140617097e-06, "loss": 0.2488, "num_tokens": 1404324336.0, "step": 7362 }, { "epoch": 2.5130568356374807, "grad_norm": 0.2742964166388873, "learning_rate": 9.009863429438544e-06, "loss": 0.2133, "num_tokens": 1404475115.0, "step": 7363 }, { "epoch": 2.513398190817546, "grad_norm": 0.24540601445195248, "learning_rate": 9.00354071825999e-06, "loss": 0.2358, "num_tokens": 1404658986.0, "step": 7364 }, { "epoch": 2.5137395459976104, "grad_norm": 0.2613104860508495, "learning_rate": 8.997218007081438e-06, "loss": 0.2188, "num_tokens": 1404830755.0, "step": 7365 }, { "epoch": 2.5140809011776755, "grad_norm": 0.22147723911501666, "learning_rate": 8.990895295902883e-06, "loss": 0.2037, "num_tokens": 1405021319.0, "step": 7366 }, { "epoch": 2.51442225635774, "grad_norm": 0.2248500667058657, "learning_rate": 8.98457258472433e-06, "loss": 0.2222, "num_tokens": 1405238023.0, "step": 7367 }, { "epoch": 2.514763611537805, "grad_norm": 0.25998871861423273, "learning_rate": 8.978249873545777e-06, "loss": 0.1981, "num_tokens": 1405383466.0, "step": 7368 }, { "epoch": 2.51510496671787, "grad_norm": 0.2530907513498492, "learning_rate": 8.971927162367224e-06, "loss": 0.1947, "num_tokens": 1405540216.0, "step": 7369 }, { "epoch": 2.515446321897935, "grad_norm": 0.27082023886020246, "learning_rate": 8.965604451188671e-06, "loss": 0.2438, "num_tokens": 1405730744.0, "step": 7370 }, { "epoch": 2.5157876770779994, "grad_norm": 0.2672399539339658, "learning_rate": 8.959281740010116e-06, "loss": 0.2094, "num_tokens": 1405862916.0, "step": 7371 }, { "epoch": 2.5161290322580645, "grad_norm": 0.2275326759411684, "learning_rate": 8.952959028831563e-06, "loss": 0.1982, "num_tokens": 1406044302.0, "step": 7372 }, { "epoch": 2.5164703874381296, "grad_norm": 0.23030489046382785, "learning_rate": 8.946636317653009e-06, "loss": 0.2093, "num_tokens": 1406233726.0, "step": 7373 }, { "epoch": 2.516811742618194, "grad_norm": 0.24141569969373847, "learning_rate": 8.940313606474457e-06, "loss": 0.2231, "num_tokens": 1406422149.0, "step": 7374 }, { "epoch": 2.5171530977982592, "grad_norm": 0.23115629452988612, "learning_rate": 8.933990895295903e-06, "loss": 0.2247, "num_tokens": 1406622142.0, "step": 7375 }, { "epoch": 2.517494452978324, "grad_norm": 0.21889970110773116, "learning_rate": 8.92766818411735e-06, "loss": 0.2396, "num_tokens": 1406846926.0, "step": 7376 }, { "epoch": 2.517835808158389, "grad_norm": 0.2215304752279889, "learning_rate": 8.921345472938796e-06, "loss": 0.2321, "num_tokens": 1407055174.0, "step": 7377 }, { "epoch": 2.5181771633384535, "grad_norm": 0.22179543140021524, "learning_rate": 8.915022761760243e-06, "loss": 0.2345, "num_tokens": 1407253414.0, "step": 7378 }, { "epoch": 2.5185185185185186, "grad_norm": 0.23622805124492954, "learning_rate": 8.908700050581689e-06, "loss": 0.2298, "num_tokens": 1407439727.0, "step": 7379 }, { "epoch": 2.518859873698583, "grad_norm": 0.2194401566321806, "learning_rate": 8.902377339403137e-06, "loss": 0.2283, "num_tokens": 1407663006.0, "step": 7380 }, { "epoch": 2.5192012288786483, "grad_norm": 0.2228958569556682, "learning_rate": 8.896054628224583e-06, "loss": 0.2156, "num_tokens": 1407855543.0, "step": 7381 }, { "epoch": 2.519542584058713, "grad_norm": 0.24672279016301216, "learning_rate": 8.88973191704603e-06, "loss": 0.2036, "num_tokens": 1408013563.0, "step": 7382 }, { "epoch": 2.519883939238778, "grad_norm": 0.23279105077699966, "learning_rate": 8.883409205867477e-06, "loss": 0.2321, "num_tokens": 1408244482.0, "step": 7383 }, { "epoch": 2.520225294418843, "grad_norm": 0.2311664918783533, "learning_rate": 8.877086494688924e-06, "loss": 0.2616, "num_tokens": 1408449540.0, "step": 7384 }, { "epoch": 2.5205666495989076, "grad_norm": 0.24649685259579884, "learning_rate": 8.870763783510369e-06, "loss": 0.2199, "num_tokens": 1408632277.0, "step": 7385 }, { "epoch": 2.5209080047789723, "grad_norm": 0.26319914320239407, "learning_rate": 8.864441072331816e-06, "loss": 0.2148, "num_tokens": 1408807010.0, "step": 7386 }, { "epoch": 2.5212493599590373, "grad_norm": 0.2388831297562773, "learning_rate": 8.858118361153263e-06, "loss": 0.2353, "num_tokens": 1408994321.0, "step": 7387 }, { "epoch": 2.5215907151391024, "grad_norm": 0.2672194631238594, "learning_rate": 8.851795649974708e-06, "loss": 0.2309, "num_tokens": 1409161730.0, "step": 7388 }, { "epoch": 2.521932070319167, "grad_norm": 0.22252863892158278, "learning_rate": 8.845472938796157e-06, "loss": 0.2089, "num_tokens": 1409354043.0, "step": 7389 }, { "epoch": 2.522273425499232, "grad_norm": 0.19080620857138017, "learning_rate": 8.839150227617602e-06, "loss": 0.2493, "num_tokens": 1409619074.0, "step": 7390 }, { "epoch": 2.5226147806792967, "grad_norm": 0.2286398002633094, "learning_rate": 8.83282751643905e-06, "loss": 0.2465, "num_tokens": 1409858441.0, "step": 7391 }, { "epoch": 2.5229561358593617, "grad_norm": 0.24924975213204578, "learning_rate": 8.826504805260496e-06, "loss": 0.2244, "num_tokens": 1410062470.0, "step": 7392 }, { "epoch": 2.5232974910394264, "grad_norm": 0.2582175324386911, "learning_rate": 8.820182094081943e-06, "loss": 0.2407, "num_tokens": 1410238396.0, "step": 7393 }, { "epoch": 2.5236388462194914, "grad_norm": 0.22973337894153506, "learning_rate": 8.813859382903388e-06, "loss": 0.2319, "num_tokens": 1410423060.0, "step": 7394 }, { "epoch": 2.5239802013995565, "grad_norm": 0.2540983483715953, "learning_rate": 8.807536671724837e-06, "loss": 0.2301, "num_tokens": 1410603773.0, "step": 7395 }, { "epoch": 2.524321556579621, "grad_norm": 0.25626230390614513, "learning_rate": 8.801213960546282e-06, "loss": 0.2512, "num_tokens": 1410786558.0, "step": 7396 }, { "epoch": 2.5246629117596857, "grad_norm": 0.24720157942989152, "learning_rate": 8.79489124936773e-06, "loss": 0.2426, "num_tokens": 1410949605.0, "step": 7397 }, { "epoch": 2.5250042669397508, "grad_norm": 0.24039114735145278, "learning_rate": 8.788568538189176e-06, "loss": 0.2378, "num_tokens": 1411115574.0, "step": 7398 }, { "epoch": 2.525345622119816, "grad_norm": 0.23597444978532883, "learning_rate": 8.782245827010623e-06, "loss": 0.2217, "num_tokens": 1411288132.0, "step": 7399 }, { "epoch": 2.5256869772998805, "grad_norm": 0.2360589765061117, "learning_rate": 8.775923115832069e-06, "loss": 0.2509, "num_tokens": 1411500844.0, "step": 7400 }, { "epoch": 2.5260283324799455, "grad_norm": 0.2333657711104494, "learning_rate": 8.769600404653516e-06, "loss": 0.233, "num_tokens": 1411695664.0, "step": 7401 }, { "epoch": 2.52636968766001, "grad_norm": 0.23688832872088497, "learning_rate": 8.763277693474963e-06, "loss": 0.2165, "num_tokens": 1411868749.0, "step": 7402 }, { "epoch": 2.526711042840075, "grad_norm": 0.2526225047399702, "learning_rate": 8.756954982296408e-06, "loss": 0.2215, "num_tokens": 1412035413.0, "step": 7403 }, { "epoch": 2.52705239802014, "grad_norm": 0.24600127601069063, "learning_rate": 8.750632271117857e-06, "loss": 0.249, "num_tokens": 1412242654.0, "step": 7404 }, { "epoch": 2.527393753200205, "grad_norm": 0.23758717421807055, "learning_rate": 8.744309559939302e-06, "loss": 0.217, "num_tokens": 1412445557.0, "step": 7405 }, { "epoch": 2.52773510838027, "grad_norm": 0.2286750653153672, "learning_rate": 8.737986848760749e-06, "loss": 0.2354, "num_tokens": 1412662092.0, "step": 7406 }, { "epoch": 2.5280764635603346, "grad_norm": 0.23403221058865242, "learning_rate": 8.731664137582196e-06, "loss": 0.2332, "num_tokens": 1412871394.0, "step": 7407 }, { "epoch": 2.528417818740399, "grad_norm": 0.3184889793965239, "learning_rate": 8.725341426403643e-06, "loss": 0.233, "num_tokens": 1413018000.0, "step": 7408 }, { "epoch": 2.5287591739204642, "grad_norm": 0.2325203661927441, "learning_rate": 8.719018715225088e-06, "loss": 0.2003, "num_tokens": 1413191623.0, "step": 7409 }, { "epoch": 2.5291005291005293, "grad_norm": 0.2552101706101555, "learning_rate": 8.712696004046537e-06, "loss": 0.2019, "num_tokens": 1413355937.0, "step": 7410 }, { "epoch": 2.529441884280594, "grad_norm": 0.2439986448074431, "learning_rate": 8.706373292867982e-06, "loss": 0.2038, "num_tokens": 1413524022.0, "step": 7411 }, { "epoch": 2.529783239460659, "grad_norm": 0.2159586833960118, "learning_rate": 8.700050581689429e-06, "loss": 0.2079, "num_tokens": 1413722842.0, "step": 7412 }, { "epoch": 2.5301245946407236, "grad_norm": 0.24815395290314554, "learning_rate": 8.693727870510876e-06, "loss": 0.2357, "num_tokens": 1413901692.0, "step": 7413 }, { "epoch": 2.5304659498207887, "grad_norm": 0.24576448480720464, "learning_rate": 8.687405159332323e-06, "loss": 0.2354, "num_tokens": 1414100827.0, "step": 7414 }, { "epoch": 2.5308073050008533, "grad_norm": 0.24488450504506387, "learning_rate": 8.681082448153768e-06, "loss": 0.2499, "num_tokens": 1414324145.0, "step": 7415 }, { "epoch": 2.5311486601809183, "grad_norm": 0.24155578811732437, "learning_rate": 8.674759736975215e-06, "loss": 0.2309, "num_tokens": 1414516067.0, "step": 7416 }, { "epoch": 2.531490015360983, "grad_norm": 0.27410404645014713, "learning_rate": 8.668437025796662e-06, "loss": 0.2392, "num_tokens": 1414662448.0, "step": 7417 }, { "epoch": 2.531831370541048, "grad_norm": 0.24166237976112062, "learning_rate": 8.662114314618108e-06, "loss": 0.2141, "num_tokens": 1414832973.0, "step": 7418 }, { "epoch": 2.5321727257211126, "grad_norm": 0.22857376590979167, "learning_rate": 8.655791603439556e-06, "loss": 0.2134, "num_tokens": 1415028641.0, "step": 7419 }, { "epoch": 2.5325140809011777, "grad_norm": 0.23073690642134825, "learning_rate": 8.649468892261002e-06, "loss": 0.2307, "num_tokens": 1415225091.0, "step": 7420 }, { "epoch": 2.5328554360812428, "grad_norm": 0.22778100726263156, "learning_rate": 8.643146181082449e-06, "loss": 0.2181, "num_tokens": 1415430125.0, "step": 7421 }, { "epoch": 2.5331967912613074, "grad_norm": 0.24362806811362045, "learning_rate": 8.636823469903895e-06, "loss": 0.2237, "num_tokens": 1415612310.0, "step": 7422 }, { "epoch": 2.533538146441372, "grad_norm": 0.23966115075506242, "learning_rate": 8.630500758725342e-06, "loss": 0.1998, "num_tokens": 1415787055.0, "step": 7423 }, { "epoch": 2.533879501621437, "grad_norm": 0.22318767347527974, "learning_rate": 8.624178047546788e-06, "loss": 0.2421, "num_tokens": 1415993970.0, "step": 7424 }, { "epoch": 2.534220856801502, "grad_norm": 0.23776708176038808, "learning_rate": 8.617855336368236e-06, "loss": 0.2333, "num_tokens": 1416191654.0, "step": 7425 }, { "epoch": 2.5345622119815667, "grad_norm": 0.23081479098030486, "learning_rate": 8.611532625189682e-06, "loss": 0.1989, "num_tokens": 1416379245.0, "step": 7426 }, { "epoch": 2.534903567161632, "grad_norm": 0.2378175464074311, "learning_rate": 8.605209914011129e-06, "loss": 0.2118, "num_tokens": 1416555512.0, "step": 7427 }, { "epoch": 2.5352449223416964, "grad_norm": 0.2272411524165371, "learning_rate": 8.598887202832576e-06, "loss": 0.2255, "num_tokens": 1416752056.0, "step": 7428 }, { "epoch": 2.5355862775217615, "grad_norm": 0.22744514301612134, "learning_rate": 8.592564491654023e-06, "loss": 0.2381, "num_tokens": 1416958778.0, "step": 7429 }, { "epoch": 2.535927632701826, "grad_norm": 0.22619008817309472, "learning_rate": 8.586241780475468e-06, "loss": 0.2267, "num_tokens": 1417186556.0, "step": 7430 }, { "epoch": 2.536268987881891, "grad_norm": 0.22658801227881964, "learning_rate": 8.579919069296915e-06, "loss": 0.2461, "num_tokens": 1417429556.0, "step": 7431 }, { "epoch": 2.536610343061956, "grad_norm": 0.21812795921324477, "learning_rate": 8.573596358118362e-06, "loss": 0.2107, "num_tokens": 1417634781.0, "step": 7432 }, { "epoch": 2.536951698242021, "grad_norm": 0.2271370602723919, "learning_rate": 8.567273646939807e-06, "loss": 0.2249, "num_tokens": 1417831123.0, "step": 7433 }, { "epoch": 2.5372930534220854, "grad_norm": 0.2453793756952493, "learning_rate": 8.560950935761256e-06, "loss": 0.2395, "num_tokens": 1418043373.0, "step": 7434 }, { "epoch": 2.5376344086021505, "grad_norm": 0.2885407180506331, "learning_rate": 8.554628224582701e-06, "loss": 0.2396, "num_tokens": 1418234051.0, "step": 7435 }, { "epoch": 2.5379757637822156, "grad_norm": 0.20345789734867573, "learning_rate": 8.548305513404148e-06, "loss": 0.2191, "num_tokens": 1418443398.0, "step": 7436 }, { "epoch": 2.53831711896228, "grad_norm": 0.2713816123566944, "learning_rate": 8.541982802225595e-06, "loss": 0.2219, "num_tokens": 1418597945.0, "step": 7437 }, { "epoch": 2.5386584741423452, "grad_norm": 0.25306949316809635, "learning_rate": 8.535660091047042e-06, "loss": 0.2357, "num_tokens": 1418785843.0, "step": 7438 }, { "epoch": 2.53899982932241, "grad_norm": 0.2380396586497148, "learning_rate": 8.529337379868487e-06, "loss": 0.2355, "num_tokens": 1418991297.0, "step": 7439 }, { "epoch": 2.539341184502475, "grad_norm": 0.2487927120828491, "learning_rate": 8.523014668689934e-06, "loss": 0.2456, "num_tokens": 1419166027.0, "step": 7440 }, { "epoch": 2.5396825396825395, "grad_norm": 0.22322730083585515, "learning_rate": 8.516691957511381e-06, "loss": 0.2332, "num_tokens": 1419401495.0, "step": 7441 }, { "epoch": 2.5400238948626046, "grad_norm": 0.25595817863813075, "learning_rate": 8.510369246332828e-06, "loss": 0.2215, "num_tokens": 1419587094.0, "step": 7442 }, { "epoch": 2.5403652500426697, "grad_norm": 0.2093533577836228, "learning_rate": 8.504046535154274e-06, "loss": 0.2118, "num_tokens": 1419801237.0, "step": 7443 }, { "epoch": 2.5407066052227343, "grad_norm": 0.22881144563267797, "learning_rate": 8.49772382397572e-06, "loss": 0.2242, "num_tokens": 1419987855.0, "step": 7444 }, { "epoch": 2.541047960402799, "grad_norm": 0.21868759358938392, "learning_rate": 8.491401112797168e-06, "loss": 0.2222, "num_tokens": 1420210102.0, "step": 7445 }, { "epoch": 2.541389315582864, "grad_norm": 0.23091787660132082, "learning_rate": 8.485078401618615e-06, "loss": 0.2339, "num_tokens": 1420414500.0, "step": 7446 }, { "epoch": 2.541730670762929, "grad_norm": 0.25951727571198024, "learning_rate": 8.478755690440062e-06, "loss": 0.2393, "num_tokens": 1420585678.0, "step": 7447 }, { "epoch": 2.5420720259429936, "grad_norm": 0.23029033362345605, "learning_rate": 8.472432979261507e-06, "loss": 0.227, "num_tokens": 1420800865.0, "step": 7448 }, { "epoch": 2.5424133811230587, "grad_norm": 0.24772846101502438, "learning_rate": 8.466110268082954e-06, "loss": 0.2445, "num_tokens": 1420999763.0, "step": 7449 }, { "epoch": 2.5427547363031233, "grad_norm": 0.2224083232543708, "learning_rate": 8.459787556904401e-06, "loss": 0.2205, "num_tokens": 1421188755.0, "step": 7450 }, { "epoch": 2.5430960914831884, "grad_norm": 0.2734262828300311, "learning_rate": 8.453464845725848e-06, "loss": 0.2429, "num_tokens": 1421367277.0, "step": 7451 }, { "epoch": 2.543437446663253, "grad_norm": 0.2493435527260524, "learning_rate": 8.447142134547293e-06, "loss": 0.209, "num_tokens": 1421534623.0, "step": 7452 }, { "epoch": 2.543778801843318, "grad_norm": 0.2550802862718348, "learning_rate": 8.440819423368742e-06, "loss": 0.2432, "num_tokens": 1421729908.0, "step": 7453 }, { "epoch": 2.5441201570233827, "grad_norm": 0.23392660856828992, "learning_rate": 8.434496712190187e-06, "loss": 0.2523, "num_tokens": 1421944880.0, "step": 7454 }, { "epoch": 2.5444615122034477, "grad_norm": 0.23666952739134822, "learning_rate": 8.428174001011634e-06, "loss": 0.2103, "num_tokens": 1422118392.0, "step": 7455 }, { "epoch": 2.5448028673835124, "grad_norm": 0.22492428289494926, "learning_rate": 8.421851289833081e-06, "loss": 0.2031, "num_tokens": 1422302618.0, "step": 7456 }, { "epoch": 2.5451442225635774, "grad_norm": 0.23406642760501398, "learning_rate": 8.415528578654528e-06, "loss": 0.2181, "num_tokens": 1422511428.0, "step": 7457 }, { "epoch": 2.5454855777436425, "grad_norm": 0.22851828304663413, "learning_rate": 8.409205867475973e-06, "loss": 0.221, "num_tokens": 1422709261.0, "step": 7458 }, { "epoch": 2.545826932923707, "grad_norm": 0.2338906660702443, "learning_rate": 8.40288315629742e-06, "loss": 0.2321, "num_tokens": 1422920716.0, "step": 7459 }, { "epoch": 2.5461682881037717, "grad_norm": 0.23798676285169157, "learning_rate": 8.396560445118867e-06, "loss": 0.2079, "num_tokens": 1423099347.0, "step": 7460 }, { "epoch": 2.5465096432838368, "grad_norm": 0.21227854953156564, "learning_rate": 8.390237733940314e-06, "loss": 0.2172, "num_tokens": 1423339301.0, "step": 7461 }, { "epoch": 2.546850998463902, "grad_norm": 0.22356942166505644, "learning_rate": 8.383915022761761e-06, "loss": 0.2324, "num_tokens": 1423553369.0, "step": 7462 }, { "epoch": 2.5471923536439665, "grad_norm": 0.21392680924978053, "learning_rate": 8.377592311583207e-06, "loss": 0.2209, "num_tokens": 1423753440.0, "step": 7463 }, { "epoch": 2.5475337088240315, "grad_norm": 0.1961048163015769, "learning_rate": 8.371269600404654e-06, "loss": 0.2198, "num_tokens": 1424005094.0, "step": 7464 }, { "epoch": 2.547875064004096, "grad_norm": 0.2300782150479333, "learning_rate": 8.3649468892261e-06, "loss": 0.2405, "num_tokens": 1424212889.0, "step": 7465 }, { "epoch": 2.548216419184161, "grad_norm": 0.25801499648356585, "learning_rate": 8.358624178047547e-06, "loss": 0.2029, "num_tokens": 1424371147.0, "step": 7466 }, { "epoch": 2.548557774364226, "grad_norm": 0.23730183289184154, "learning_rate": 8.352301466868993e-06, "loss": 0.2169, "num_tokens": 1424564959.0, "step": 7467 }, { "epoch": 2.548899129544291, "grad_norm": 0.24590125119335834, "learning_rate": 8.345978755690441e-06, "loss": 0.2021, "num_tokens": 1424730865.0, "step": 7468 }, { "epoch": 2.549240484724356, "grad_norm": 0.2520233940720295, "learning_rate": 8.339656044511887e-06, "loss": 0.2343, "num_tokens": 1424918380.0, "step": 7469 }, { "epoch": 2.5495818399044206, "grad_norm": 0.21707194228366616, "learning_rate": 8.333333333333334e-06, "loss": 0.2392, "num_tokens": 1425139027.0, "step": 7470 }, { "epoch": 2.549923195084485, "grad_norm": 0.2316911833935876, "learning_rate": 8.32701062215478e-06, "loss": 0.2213, "num_tokens": 1425331817.0, "step": 7471 }, { "epoch": 2.5502645502645502, "grad_norm": 0.19703864535044, "learning_rate": 8.320687910976228e-06, "loss": 0.2196, "num_tokens": 1425566840.0, "step": 7472 }, { "epoch": 2.5506059054446153, "grad_norm": 0.213387378378688, "learning_rate": 8.314365199797673e-06, "loss": 0.2409, "num_tokens": 1425805658.0, "step": 7473 }, { "epoch": 2.55094726062468, "grad_norm": 0.26152782876862507, "learning_rate": 8.30804248861912e-06, "loss": 0.2729, "num_tokens": 1425987769.0, "step": 7474 }, { "epoch": 2.551288615804745, "grad_norm": 0.2374824437845282, "learning_rate": 8.301719777440567e-06, "loss": 0.2012, "num_tokens": 1426188152.0, "step": 7475 }, { "epoch": 2.5516299709848096, "grad_norm": 0.23164488332107558, "learning_rate": 8.295397066262014e-06, "loss": 0.2291, "num_tokens": 1426368038.0, "step": 7476 }, { "epoch": 2.5519713261648747, "grad_norm": 0.23797424609573953, "learning_rate": 8.289074355083461e-06, "loss": 0.2168, "num_tokens": 1426549381.0, "step": 7477 }, { "epoch": 2.5523126813449393, "grad_norm": 0.24485983795158608, "learning_rate": 8.282751643904906e-06, "loss": 0.2263, "num_tokens": 1426742689.0, "step": 7478 }, { "epoch": 2.5526540365250043, "grad_norm": 0.2231001311794812, "learning_rate": 8.276428932726353e-06, "loss": 0.2388, "num_tokens": 1426948497.0, "step": 7479 }, { "epoch": 2.5529953917050694, "grad_norm": 0.2521206930051158, "learning_rate": 8.2701062215478e-06, "loss": 0.2166, "num_tokens": 1427116479.0, "step": 7480 }, { "epoch": 2.553336746885134, "grad_norm": 0.2625770393865816, "learning_rate": 8.263783510369247e-06, "loss": 0.203, "num_tokens": 1427272276.0, "step": 7481 }, { "epoch": 2.5536781020651986, "grad_norm": 0.24519187579904414, "learning_rate": 8.257460799190692e-06, "loss": 0.2376, "num_tokens": 1427460126.0, "step": 7482 }, { "epoch": 2.5540194572452637, "grad_norm": 0.2283878430239412, "learning_rate": 8.251138088012141e-06, "loss": 0.2454, "num_tokens": 1427656495.0, "step": 7483 }, { "epoch": 2.5543608124253288, "grad_norm": 0.23431897423932635, "learning_rate": 8.244815376833586e-06, "loss": 0.2333, "num_tokens": 1427835130.0, "step": 7484 }, { "epoch": 2.5547021676053934, "grad_norm": 0.229520624203557, "learning_rate": 8.238492665655033e-06, "loss": 0.2186, "num_tokens": 1428017690.0, "step": 7485 }, { "epoch": 2.5550435227854584, "grad_norm": 0.22220207170777706, "learning_rate": 8.23216995447648e-06, "loss": 0.2228, "num_tokens": 1428242619.0, "step": 7486 }, { "epoch": 2.555384877965523, "grad_norm": 0.23945938524609697, "learning_rate": 8.225847243297927e-06, "loss": 0.2284, "num_tokens": 1428447424.0, "step": 7487 }, { "epoch": 2.555726233145588, "grad_norm": 0.24315915831702975, "learning_rate": 8.219524532119373e-06, "loss": 0.217, "num_tokens": 1428618453.0, "step": 7488 }, { "epoch": 2.5560675883256527, "grad_norm": 0.22179135117862195, "learning_rate": 8.21320182094082e-06, "loss": 0.2086, "num_tokens": 1428800379.0, "step": 7489 }, { "epoch": 2.556408943505718, "grad_norm": 0.23987004130835146, "learning_rate": 8.206879109762267e-06, "loss": 0.2318, "num_tokens": 1428991787.0, "step": 7490 }, { "epoch": 2.5567502986857824, "grad_norm": 0.22446315049645799, "learning_rate": 8.200556398583714e-06, "loss": 0.2269, "num_tokens": 1429172378.0, "step": 7491 }, { "epoch": 2.5570916538658475, "grad_norm": 0.22271737571381436, "learning_rate": 8.19423368740516e-06, "loss": 0.2367, "num_tokens": 1429393545.0, "step": 7492 }, { "epoch": 2.557433009045912, "grad_norm": 0.23488802405019954, "learning_rate": 8.187910976226606e-06, "loss": 0.2259, "num_tokens": 1429584535.0, "step": 7493 }, { "epoch": 2.557774364225977, "grad_norm": 0.2651355393633054, "learning_rate": 8.181588265048053e-06, "loss": 0.2352, "num_tokens": 1429742833.0, "step": 7494 }, { "epoch": 2.558115719406042, "grad_norm": 0.22359337394289525, "learning_rate": 8.1752655538695e-06, "loss": 0.247, "num_tokens": 1429958435.0, "step": 7495 }, { "epoch": 2.558457074586107, "grad_norm": 0.20780179609887942, "learning_rate": 8.168942842690947e-06, "loss": 0.229, "num_tokens": 1430181737.0, "step": 7496 }, { "epoch": 2.5587984297661714, "grad_norm": 0.2275702080999997, "learning_rate": 8.162620131512392e-06, "loss": 0.2199, "num_tokens": 1430391935.0, "step": 7497 }, { "epoch": 2.5591397849462365, "grad_norm": 0.2254198606239327, "learning_rate": 8.15629742033384e-06, "loss": 0.2258, "num_tokens": 1430603602.0, "step": 7498 }, { "epoch": 2.5594811401263016, "grad_norm": 0.23042183891469642, "learning_rate": 8.149974709155286e-06, "loss": 0.2418, "num_tokens": 1430811282.0, "step": 7499 }, { "epoch": 2.559822495306366, "grad_norm": 0.22910984108923813, "learning_rate": 8.143651997976733e-06, "loss": 0.2298, "num_tokens": 1431024901.0, "step": 7500 }, { "epoch": 2.5601638504864312, "grad_norm": 0.24560882075609194, "learning_rate": 8.13732928679818e-06, "loss": 0.2285, "num_tokens": 1431199957.0, "step": 7501 }, { "epoch": 2.560505205666496, "grad_norm": 0.2295887295806669, "learning_rate": 8.131006575619627e-06, "loss": 0.2415, "num_tokens": 1431409686.0, "step": 7502 }, { "epoch": 2.560846560846561, "grad_norm": 0.2398001413507399, "learning_rate": 8.124683864441072e-06, "loss": 0.2064, "num_tokens": 1431579103.0, "step": 7503 }, { "epoch": 2.5611879160266255, "grad_norm": 0.26214680219541103, "learning_rate": 8.11836115326252e-06, "loss": 0.2392, "num_tokens": 1431739567.0, "step": 7504 }, { "epoch": 2.5615292712066906, "grad_norm": 0.2270394613911605, "learning_rate": 8.112038442083966e-06, "loss": 0.2219, "num_tokens": 1431912593.0, "step": 7505 }, { "epoch": 2.5618706263867557, "grad_norm": 0.2443231281684172, "learning_rate": 8.105715730905413e-06, "loss": 0.229, "num_tokens": 1432107496.0, "step": 7506 }, { "epoch": 2.5622119815668203, "grad_norm": 0.2786880362909713, "learning_rate": 8.099393019726859e-06, "loss": 0.243, "num_tokens": 1432256267.0, "step": 7507 }, { "epoch": 2.562553336746885, "grad_norm": 0.2156036865745452, "learning_rate": 8.093070308548306e-06, "loss": 0.2313, "num_tokens": 1432474532.0, "step": 7508 }, { "epoch": 2.56289469192695, "grad_norm": 0.23608688089394825, "learning_rate": 8.086747597369753e-06, "loss": 0.2167, "num_tokens": 1432660622.0, "step": 7509 }, { "epoch": 2.563236047107015, "grad_norm": 0.21599500209016922, "learning_rate": 8.080424886191198e-06, "loss": 0.2257, "num_tokens": 1432875898.0, "step": 7510 }, { "epoch": 2.5635774022870796, "grad_norm": 0.21122825181136995, "learning_rate": 8.074102175012646e-06, "loss": 0.219, "num_tokens": 1433103540.0, "step": 7511 }, { "epoch": 2.5639187574671447, "grad_norm": 0.24338031998716952, "learning_rate": 8.067779463834092e-06, "loss": 0.2478, "num_tokens": 1433290161.0, "step": 7512 }, { "epoch": 2.5642601126472093, "grad_norm": 0.2391820126659091, "learning_rate": 8.061456752655539e-06, "loss": 0.2116, "num_tokens": 1433465800.0, "step": 7513 }, { "epoch": 2.5646014678272744, "grad_norm": 0.2329452344474514, "learning_rate": 8.055134041476986e-06, "loss": 0.2435, "num_tokens": 1433680833.0, "step": 7514 }, { "epoch": 2.564942823007339, "grad_norm": 0.24396069802506964, "learning_rate": 8.048811330298433e-06, "loss": 0.2463, "num_tokens": 1433867132.0, "step": 7515 }, { "epoch": 2.565284178187404, "grad_norm": 0.22520827097162127, "learning_rate": 8.042488619119878e-06, "loss": 0.2075, "num_tokens": 1434060517.0, "step": 7516 }, { "epoch": 2.565625533367469, "grad_norm": 0.2556934500113873, "learning_rate": 8.036165907941327e-06, "loss": 0.2552, "num_tokens": 1434245189.0, "step": 7517 }, { "epoch": 2.5659668885475337, "grad_norm": 0.22927534453645526, "learning_rate": 8.029843196762772e-06, "loss": 0.2185, "num_tokens": 1434456474.0, "step": 7518 }, { "epoch": 2.5663082437275984, "grad_norm": 0.23645693754961947, "learning_rate": 8.023520485584219e-06, "loss": 0.2167, "num_tokens": 1434641995.0, "step": 7519 }, { "epoch": 2.5666495989076634, "grad_norm": 0.23380669019230965, "learning_rate": 8.017197774405666e-06, "loss": 0.2255, "num_tokens": 1434823666.0, "step": 7520 }, { "epoch": 2.5669909540877285, "grad_norm": 0.272577565050044, "learning_rate": 8.010875063227111e-06, "loss": 0.213, "num_tokens": 1434981351.0, "step": 7521 }, { "epoch": 2.567332309267793, "grad_norm": 0.21363780702420265, "learning_rate": 8.004552352048558e-06, "loss": 0.2434, "num_tokens": 1435213574.0, "step": 7522 }, { "epoch": 2.567673664447858, "grad_norm": 0.23589050151135466, "learning_rate": 7.998229640870005e-06, "loss": 0.2371, "num_tokens": 1435413783.0, "step": 7523 }, { "epoch": 2.568015019627923, "grad_norm": 0.22029152269528918, "learning_rate": 7.991906929691452e-06, "loss": 0.2249, "num_tokens": 1435606723.0, "step": 7524 }, { "epoch": 2.568356374807988, "grad_norm": 0.245856983838737, "learning_rate": 7.985584218512897e-06, "loss": 0.2282, "num_tokens": 1435782921.0, "step": 7525 }, { "epoch": 2.5686977299880525, "grad_norm": 0.23894030556782772, "learning_rate": 7.979261507334346e-06, "loss": 0.2178, "num_tokens": 1435971183.0, "step": 7526 }, { "epoch": 2.5690390851681175, "grad_norm": 0.21685360386716312, "learning_rate": 7.972938796155791e-06, "loss": 0.2392, "num_tokens": 1436189096.0, "step": 7527 }, { "epoch": 2.569380440348182, "grad_norm": 0.23822026860358056, "learning_rate": 7.966616084977238e-06, "loss": 0.219, "num_tokens": 1436381760.0, "step": 7528 }, { "epoch": 2.569721795528247, "grad_norm": 0.24692758698722858, "learning_rate": 7.960293373798685e-06, "loss": 0.2273, "num_tokens": 1436568233.0, "step": 7529 }, { "epoch": 2.570063150708312, "grad_norm": 0.28596892417375513, "learning_rate": 7.953970662620132e-06, "loss": 0.2159, "num_tokens": 1436716569.0, "step": 7530 }, { "epoch": 2.570404505888377, "grad_norm": 0.24462127203598774, "learning_rate": 7.947647951441578e-06, "loss": 0.2463, "num_tokens": 1436923502.0, "step": 7531 }, { "epoch": 2.570745861068442, "grad_norm": 0.24515535061776345, "learning_rate": 7.941325240263026e-06, "loss": 0.2313, "num_tokens": 1437111929.0, "step": 7532 }, { "epoch": 2.5710872162485066, "grad_norm": 0.23074104255198866, "learning_rate": 7.935002529084472e-06, "loss": 0.2364, "num_tokens": 1437298249.0, "step": 7533 }, { "epoch": 2.571428571428571, "grad_norm": 0.24449495328377388, "learning_rate": 7.928679817905919e-06, "loss": 0.1948, "num_tokens": 1437461299.0, "step": 7534 }, { "epoch": 2.5717699266086362, "grad_norm": 0.22178267050522374, "learning_rate": 7.922357106727366e-06, "loss": 0.2308, "num_tokens": 1437662816.0, "step": 7535 }, { "epoch": 2.5721112817887013, "grad_norm": 0.2422917912915303, "learning_rate": 7.916034395548811e-06, "loss": 0.2354, "num_tokens": 1437857944.0, "step": 7536 }, { "epoch": 2.572452636968766, "grad_norm": 0.246855905555391, "learning_rate": 7.909711684370258e-06, "loss": 0.2245, "num_tokens": 1438046000.0, "step": 7537 }, { "epoch": 2.572793992148831, "grad_norm": 0.23633128375429446, "learning_rate": 7.903388973191705e-06, "loss": 0.2467, "num_tokens": 1438245689.0, "step": 7538 }, { "epoch": 2.5731353473288956, "grad_norm": 0.26456727060529084, "learning_rate": 7.897066262013152e-06, "loss": 0.2167, "num_tokens": 1438417499.0, "step": 7539 }, { "epoch": 2.5734767025089607, "grad_norm": 0.22628434909906042, "learning_rate": 7.890743550834597e-06, "loss": 0.2209, "num_tokens": 1438607500.0, "step": 7540 }, { "epoch": 2.5738180576890253, "grad_norm": 0.245279012454754, "learning_rate": 7.884420839656046e-06, "loss": 0.2448, "num_tokens": 1438816833.0, "step": 7541 }, { "epoch": 2.5741594128690903, "grad_norm": 0.2562518345486507, "learning_rate": 7.878098128477491e-06, "loss": 0.2191, "num_tokens": 1438972100.0, "step": 7542 }, { "epoch": 2.5745007680491554, "grad_norm": 0.24516656873215703, "learning_rate": 7.871775417298938e-06, "loss": 0.2186, "num_tokens": 1439139806.0, "step": 7543 }, { "epoch": 2.57484212322922, "grad_norm": 0.23068504747140078, "learning_rate": 7.865452706120385e-06, "loss": 0.2302, "num_tokens": 1439356195.0, "step": 7544 }, { "epoch": 2.5751834784092846, "grad_norm": 0.24641569262977192, "learning_rate": 7.859129994941832e-06, "loss": 0.2117, "num_tokens": 1439528074.0, "step": 7545 }, { "epoch": 2.5755248335893497, "grad_norm": 0.23256920993517488, "learning_rate": 7.852807283763277e-06, "loss": 0.2314, "num_tokens": 1439724607.0, "step": 7546 }, { "epoch": 2.5758661887694148, "grad_norm": 0.24804186466193912, "learning_rate": 7.846484572584726e-06, "loss": 0.2506, "num_tokens": 1439910898.0, "step": 7547 }, { "epoch": 2.5762075439494794, "grad_norm": 0.21094283302929856, "learning_rate": 7.840161861406171e-06, "loss": 0.2709, "num_tokens": 1440146814.0, "step": 7548 }, { "epoch": 2.5765488991295444, "grad_norm": 0.23075195218048555, "learning_rate": 7.833839150227618e-06, "loss": 0.2219, "num_tokens": 1440341832.0, "step": 7549 }, { "epoch": 2.576890254309609, "grad_norm": 0.26993304414678015, "learning_rate": 7.827516439049065e-06, "loss": 0.1805, "num_tokens": 1440461265.0, "step": 7550 }, { "epoch": 2.577231609489674, "grad_norm": 0.21511574328299432, "learning_rate": 7.82119372787051e-06, "loss": 0.2117, "num_tokens": 1440648210.0, "step": 7551 }, { "epoch": 2.5775729646697387, "grad_norm": 0.23460513357581245, "learning_rate": 7.814871016691958e-06, "loss": 0.2563, "num_tokens": 1440866050.0, "step": 7552 }, { "epoch": 2.577914319849804, "grad_norm": 0.21836689035055887, "learning_rate": 7.808548305513405e-06, "loss": 0.2096, "num_tokens": 1441073474.0, "step": 7553 }, { "epoch": 2.578255675029869, "grad_norm": 0.2532356512081511, "learning_rate": 7.802225594334852e-06, "loss": 0.2287, "num_tokens": 1441243339.0, "step": 7554 }, { "epoch": 2.5785970302099335, "grad_norm": 0.24567259534162159, "learning_rate": 7.795902883156297e-06, "loss": 0.2154, "num_tokens": 1441418810.0, "step": 7555 }, { "epoch": 2.578938385389998, "grad_norm": 0.24250858704619874, "learning_rate": 7.789580171977745e-06, "loss": 0.2373, "num_tokens": 1441631027.0, "step": 7556 }, { "epoch": 2.579279740570063, "grad_norm": 0.2415925857995795, "learning_rate": 7.78325746079919e-06, "loss": 0.2144, "num_tokens": 1441814652.0, "step": 7557 }, { "epoch": 2.579621095750128, "grad_norm": 0.2358406519101496, "learning_rate": 7.776934749620638e-06, "loss": 0.2016, "num_tokens": 1441992087.0, "step": 7558 }, { "epoch": 2.579962450930193, "grad_norm": 0.21830408222362852, "learning_rate": 7.770612038442085e-06, "loss": 0.223, "num_tokens": 1442225654.0, "step": 7559 }, { "epoch": 2.5803038061102574, "grad_norm": 0.243070537985507, "learning_rate": 7.764289327263532e-06, "loss": 0.2533, "num_tokens": 1442429957.0, "step": 7560 }, { "epoch": 2.5806451612903225, "grad_norm": 0.21310869783147177, "learning_rate": 7.757966616084977e-06, "loss": 0.2002, "num_tokens": 1442630848.0, "step": 7561 }, { "epoch": 2.5809865164703876, "grad_norm": 0.23290981450644357, "learning_rate": 7.751643904906426e-06, "loss": 0.2205, "num_tokens": 1442804386.0, "step": 7562 }, { "epoch": 2.581327871650452, "grad_norm": 0.2683401328444558, "learning_rate": 7.745321193727871e-06, "loss": 0.2061, "num_tokens": 1442946103.0, "step": 7563 }, { "epoch": 2.5816692268305172, "grad_norm": 0.21881159536686184, "learning_rate": 7.738998482549318e-06, "loss": 0.2536, "num_tokens": 1443169780.0, "step": 7564 }, { "epoch": 2.582010582010582, "grad_norm": 0.23335528299690858, "learning_rate": 7.732675771370765e-06, "loss": 0.2364, "num_tokens": 1443381915.0, "step": 7565 }, { "epoch": 2.582351937190647, "grad_norm": 0.2386999494895981, "learning_rate": 7.72635306019221e-06, "loss": 0.2245, "num_tokens": 1443559119.0, "step": 7566 }, { "epoch": 2.5826932923707115, "grad_norm": 0.23298147238905195, "learning_rate": 7.720030349013657e-06, "loss": 0.2271, "num_tokens": 1443763494.0, "step": 7567 }, { "epoch": 2.5830346475507766, "grad_norm": 0.2630246516972271, "learning_rate": 7.713707637835104e-06, "loss": 0.236, "num_tokens": 1443940120.0, "step": 7568 }, { "epoch": 2.5833760027308417, "grad_norm": 0.2295554504168893, "learning_rate": 7.707384926656551e-06, "loss": 0.2349, "num_tokens": 1444121573.0, "step": 7569 }, { "epoch": 2.5837173579109063, "grad_norm": 0.2345662720952931, "learning_rate": 7.701062215477996e-06, "loss": 0.2295, "num_tokens": 1444301936.0, "step": 7570 }, { "epoch": 2.584058713090971, "grad_norm": 0.2359330226122445, "learning_rate": 7.694739504299443e-06, "loss": 0.2224, "num_tokens": 1444484816.0, "step": 7571 }, { "epoch": 2.584400068271036, "grad_norm": 0.2305384394873012, "learning_rate": 7.68841679312089e-06, "loss": 0.2286, "num_tokens": 1444688105.0, "step": 7572 }, { "epoch": 2.584741423451101, "grad_norm": 0.2313036186321746, "learning_rate": 7.682094081942337e-06, "loss": 0.2263, "num_tokens": 1444889403.0, "step": 7573 }, { "epoch": 2.5850827786311656, "grad_norm": 0.23201984729233857, "learning_rate": 7.675771370763783e-06, "loss": 0.2498, "num_tokens": 1445118480.0, "step": 7574 }, { "epoch": 2.5854241338112307, "grad_norm": 0.2370941418980956, "learning_rate": 7.669448659585231e-06, "loss": 0.2139, "num_tokens": 1445291148.0, "step": 7575 }, { "epoch": 2.5857654889912953, "grad_norm": 0.23228409727706997, "learning_rate": 7.663125948406677e-06, "loss": 0.247, "num_tokens": 1445508365.0, "step": 7576 }, { "epoch": 2.5861068441713604, "grad_norm": 0.23460311565312075, "learning_rate": 7.656803237228124e-06, "loss": 0.2431, "num_tokens": 1445723651.0, "step": 7577 }, { "epoch": 2.586448199351425, "grad_norm": 0.23470378948456988, "learning_rate": 7.65048052604957e-06, "loss": 0.2071, "num_tokens": 1445885159.0, "step": 7578 }, { "epoch": 2.58678955453149, "grad_norm": 0.24007859044066515, "learning_rate": 7.644157814871018e-06, "loss": 0.239, "num_tokens": 1446070667.0, "step": 7579 }, { "epoch": 2.587130909711555, "grad_norm": 0.21001265483221015, "learning_rate": 7.637835103692463e-06, "loss": 0.2345, "num_tokens": 1446290980.0, "step": 7580 }, { "epoch": 2.5874722648916197, "grad_norm": 0.22110150710284512, "learning_rate": 7.63151239251391e-06, "loss": 0.2252, "num_tokens": 1446487655.0, "step": 7581 }, { "epoch": 2.5878136200716844, "grad_norm": 0.25457406139757816, "learning_rate": 7.625189681335357e-06, "loss": 0.1981, "num_tokens": 1446627982.0, "step": 7582 }, { "epoch": 2.5881549752517494, "grad_norm": 0.23743682463455001, "learning_rate": 7.618866970156803e-06, "loss": 0.244, "num_tokens": 1446847967.0, "step": 7583 }, { "epoch": 2.5884963304318145, "grad_norm": 0.26271400220793334, "learning_rate": 7.612544258978251e-06, "loss": 0.2319, "num_tokens": 1447045013.0, "step": 7584 }, { "epoch": 2.588837685611879, "grad_norm": 0.2599810663228084, "learning_rate": 7.606221547799697e-06, "loss": 0.2248, "num_tokens": 1447205882.0, "step": 7585 }, { "epoch": 2.589179040791944, "grad_norm": 0.2414214242113546, "learning_rate": 7.599898836621143e-06, "loss": 0.2261, "num_tokens": 1447374605.0, "step": 7586 }, { "epoch": 2.589520395972009, "grad_norm": 0.24843532887910622, "learning_rate": 7.59357612544259e-06, "loss": 0.2383, "num_tokens": 1447569681.0, "step": 7587 }, { "epoch": 2.589861751152074, "grad_norm": 0.24289620058315955, "learning_rate": 7.587253414264037e-06, "loss": 0.2346, "num_tokens": 1447775496.0, "step": 7588 }, { "epoch": 2.5902031063321385, "grad_norm": 0.2287968457217813, "learning_rate": 7.580930703085483e-06, "loss": 0.2412, "num_tokens": 1447989767.0, "step": 7589 }, { "epoch": 2.5905444615122035, "grad_norm": 0.24606519217988246, "learning_rate": 7.57460799190693e-06, "loss": 0.2302, "num_tokens": 1448169289.0, "step": 7590 }, { "epoch": 2.590885816692268, "grad_norm": 0.2625320912212878, "learning_rate": 7.568285280728376e-06, "loss": 0.2342, "num_tokens": 1448342290.0, "step": 7591 }, { "epoch": 2.591227171872333, "grad_norm": 0.22271286649524466, "learning_rate": 7.5619625695498225e-06, "loss": 0.2042, "num_tokens": 1448509598.0, "step": 7592 }, { "epoch": 2.591568527052398, "grad_norm": 0.2372025661914298, "learning_rate": 7.55563985837127e-06, "loss": 0.2392, "num_tokens": 1448728020.0, "step": 7593 }, { "epoch": 2.591909882232463, "grad_norm": 0.22651856882332086, "learning_rate": 7.5493171471927164e-06, "loss": 0.2334, "num_tokens": 1448946712.0, "step": 7594 }, { "epoch": 2.592251237412528, "grad_norm": 0.21836210038558407, "learning_rate": 7.5429944360141626e-06, "loss": 0.2223, "num_tokens": 1449169731.0, "step": 7595 }, { "epoch": 2.5925925925925926, "grad_norm": 0.2404786725084749, "learning_rate": 7.53667172483561e-06, "loss": 0.2018, "num_tokens": 1449326135.0, "step": 7596 }, { "epoch": 2.592933947772657, "grad_norm": 0.23145330126939276, "learning_rate": 7.5303490136570565e-06, "loss": 0.214, "num_tokens": 1449514913.0, "step": 7597 }, { "epoch": 2.5932753029527222, "grad_norm": 0.22678748088341255, "learning_rate": 7.524026302478503e-06, "loss": 0.2507, "num_tokens": 1449737260.0, "step": 7598 }, { "epoch": 2.5936166581327873, "grad_norm": 0.24233692821445849, "learning_rate": 7.5177035912999505e-06, "loss": 0.2248, "num_tokens": 1449899501.0, "step": 7599 }, { "epoch": 2.593958013312852, "grad_norm": 0.22271573969016778, "learning_rate": 7.511380880121397e-06, "loss": 0.2009, "num_tokens": 1450082185.0, "step": 7600 }, { "epoch": 2.594299368492917, "grad_norm": 0.21783181903838092, "learning_rate": 7.505058168942843e-06, "loss": 0.2154, "num_tokens": 1450267594.0, "step": 7601 }, { "epoch": 2.5946407236729816, "grad_norm": 0.2282926453661712, "learning_rate": 7.49873545776429e-06, "loss": 0.2436, "num_tokens": 1450459931.0, "step": 7602 }, { "epoch": 2.5949820788530467, "grad_norm": 0.23446026953413476, "learning_rate": 7.492412746585737e-06, "loss": 0.2096, "num_tokens": 1450637085.0, "step": 7603 }, { "epoch": 2.5953234340331113, "grad_norm": 0.23082260321011142, "learning_rate": 7.486090035407183e-06, "loss": 0.247, "num_tokens": 1450825940.0, "step": 7604 }, { "epoch": 2.5956647892131763, "grad_norm": 0.20776340708972682, "learning_rate": 7.47976732422863e-06, "loss": 0.2141, "num_tokens": 1451035470.0, "step": 7605 }, { "epoch": 2.5960061443932414, "grad_norm": 0.26157162667441397, "learning_rate": 7.473444613050076e-06, "loss": 0.2444, "num_tokens": 1451196945.0, "step": 7606 }, { "epoch": 2.596347499573306, "grad_norm": 0.24523249847876352, "learning_rate": 7.467121901871522e-06, "loss": 0.2114, "num_tokens": 1451368911.0, "step": 7607 }, { "epoch": 2.5966888547533706, "grad_norm": 0.2351612326548893, "learning_rate": 7.46079919069297e-06, "loss": 0.2396, "num_tokens": 1451577705.0, "step": 7608 }, { "epoch": 2.5970302099334357, "grad_norm": 0.23242726617065718, "learning_rate": 7.454476479514416e-06, "loss": 0.2365, "num_tokens": 1451770579.0, "step": 7609 }, { "epoch": 2.5973715651135008, "grad_norm": 0.2710805289579722, "learning_rate": 7.448153768335862e-06, "loss": 0.2026, "num_tokens": 1451911384.0, "step": 7610 }, { "epoch": 2.5977129202935654, "grad_norm": 0.2653356991948967, "learning_rate": 7.44183105715731e-06, "loss": 0.2378, "num_tokens": 1452093133.0, "step": 7611 }, { "epoch": 2.5980542754736304, "grad_norm": 0.22182129884838417, "learning_rate": 7.435508345978756e-06, "loss": 0.2148, "num_tokens": 1452285236.0, "step": 7612 }, { "epoch": 2.598395630653695, "grad_norm": 0.27194764298732604, "learning_rate": 7.429185634800202e-06, "loss": 0.2394, "num_tokens": 1452481186.0, "step": 7613 }, { "epoch": 2.59873698583376, "grad_norm": 0.24740346757159176, "learning_rate": 7.42286292362165e-06, "loss": 0.2498, "num_tokens": 1452666761.0, "step": 7614 }, { "epoch": 2.5990783410138247, "grad_norm": 0.23622505139795663, "learning_rate": 7.416540212443096e-06, "loss": 0.2306, "num_tokens": 1452831330.0, "step": 7615 }, { "epoch": 2.59941969619389, "grad_norm": 0.24992396139870213, "learning_rate": 7.4102175012645424e-06, "loss": 0.233, "num_tokens": 1453012091.0, "step": 7616 }, { "epoch": 2.599761051373955, "grad_norm": 0.20850143135849206, "learning_rate": 7.403894790085989e-06, "loss": 0.2289, "num_tokens": 1453217567.0, "step": 7617 }, { "epoch": 2.6001024065540195, "grad_norm": 0.23552281457575763, "learning_rate": 7.3975720789074356e-06, "loss": 0.2437, "num_tokens": 1453416181.0, "step": 7618 }, { "epoch": 2.600443761734084, "grad_norm": 0.22866167060774514, "learning_rate": 7.3912493677288825e-06, "loss": 0.2321, "num_tokens": 1453630981.0, "step": 7619 }, { "epoch": 2.600785116914149, "grad_norm": 0.23827136166930254, "learning_rate": 7.3849266565503295e-06, "loss": 0.2516, "num_tokens": 1453831906.0, "step": 7620 }, { "epoch": 2.601126472094214, "grad_norm": 0.23481041944564998, "learning_rate": 7.378603945371776e-06, "loss": 0.2107, "num_tokens": 1453996245.0, "step": 7621 }, { "epoch": 2.601467827274279, "grad_norm": 0.22833238942035594, "learning_rate": 7.372281234193222e-06, "loss": 0.2331, "num_tokens": 1454200083.0, "step": 7622 }, { "epoch": 2.601809182454344, "grad_norm": 0.26328399705595684, "learning_rate": 7.36595852301467e-06, "loss": 0.2361, "num_tokens": 1454349201.0, "step": 7623 }, { "epoch": 2.6021505376344085, "grad_norm": 0.22156208593225002, "learning_rate": 7.359635811836116e-06, "loss": 0.2154, "num_tokens": 1454545220.0, "step": 7624 }, { "epoch": 2.6024918928144736, "grad_norm": 0.22129660490839834, "learning_rate": 7.353313100657562e-06, "loss": 0.261, "num_tokens": 1454788842.0, "step": 7625 }, { "epoch": 2.602833247994538, "grad_norm": 0.22967330017386203, "learning_rate": 7.34699038947901e-06, "loss": 0.2297, "num_tokens": 1454986209.0, "step": 7626 }, { "epoch": 2.6031746031746033, "grad_norm": 0.25362879442847686, "learning_rate": 7.340667678300456e-06, "loss": 0.2206, "num_tokens": 1455151236.0, "step": 7627 }, { "epoch": 2.603515958354668, "grad_norm": 0.19630620259628118, "learning_rate": 7.334344967121902e-06, "loss": 0.2242, "num_tokens": 1455395090.0, "step": 7628 }, { "epoch": 2.603857313534733, "grad_norm": 0.22592930479286125, "learning_rate": 7.32802225594335e-06, "loss": 0.2312, "num_tokens": 1455610744.0, "step": 7629 }, { "epoch": 2.6041986687147975, "grad_norm": 0.22503508983316478, "learning_rate": 7.321699544764796e-06, "loss": 0.2192, "num_tokens": 1455823176.0, "step": 7630 }, { "epoch": 2.6045400238948626, "grad_norm": 0.25312738972094884, "learning_rate": 7.315376833586242e-06, "loss": 0.2061, "num_tokens": 1455981178.0, "step": 7631 }, { "epoch": 2.6048813790749277, "grad_norm": 0.25984735873507736, "learning_rate": 7.309054122407689e-06, "loss": 0.2416, "num_tokens": 1456160626.0, "step": 7632 }, { "epoch": 2.6052227342549923, "grad_norm": 0.2701671237931706, "learning_rate": 7.302731411229135e-06, "loss": 0.2172, "num_tokens": 1456337572.0, "step": 7633 }, { "epoch": 2.605564089435057, "grad_norm": 0.24952088147016566, "learning_rate": 7.296408700050582e-06, "loss": 0.236, "num_tokens": 1456549906.0, "step": 7634 }, { "epoch": 2.605905444615122, "grad_norm": 0.23916233061151307, "learning_rate": 7.290085988872028e-06, "loss": 0.2131, "num_tokens": 1456738147.0, "step": 7635 }, { "epoch": 2.606246799795187, "grad_norm": 0.2508332954177531, "learning_rate": 7.283763277693475e-06, "loss": 0.2182, "num_tokens": 1456911509.0, "step": 7636 }, { "epoch": 2.6065881549752516, "grad_norm": 0.2272020804579338, "learning_rate": 7.2774405665149215e-06, "loss": 0.2321, "num_tokens": 1457136946.0, "step": 7637 }, { "epoch": 2.6069295101553167, "grad_norm": 0.24778617469094294, "learning_rate": 7.271117855336368e-06, "loss": 0.2446, "num_tokens": 1457318396.0, "step": 7638 }, { "epoch": 2.6072708653353813, "grad_norm": 0.2289836862080525, "learning_rate": 7.2647951441578154e-06, "loss": 0.2038, "num_tokens": 1457494002.0, "step": 7639 }, { "epoch": 2.6076122205154464, "grad_norm": 0.2553982907327543, "learning_rate": 7.2584724329792616e-06, "loss": 0.226, "num_tokens": 1457645328.0, "step": 7640 }, { "epoch": 2.607953575695511, "grad_norm": 0.24724574928655652, "learning_rate": 7.252149721800708e-06, "loss": 0.2437, "num_tokens": 1457837369.0, "step": 7641 }, { "epoch": 2.608294930875576, "grad_norm": 0.22959407332721787, "learning_rate": 7.2458270106221555e-06, "loss": 0.2161, "num_tokens": 1458040820.0, "step": 7642 }, { "epoch": 2.608636286055641, "grad_norm": 0.226627816739441, "learning_rate": 7.239504299443602e-06, "loss": 0.2295, "num_tokens": 1458250467.0, "step": 7643 }, { "epoch": 2.6089776412357057, "grad_norm": 0.24003130672551215, "learning_rate": 7.233181588265048e-06, "loss": 0.2186, "num_tokens": 1458422675.0, "step": 7644 }, { "epoch": 2.6093189964157704, "grad_norm": 0.2549515567103769, "learning_rate": 7.226858877086496e-06, "loss": 0.2334, "num_tokens": 1458602954.0, "step": 7645 }, { "epoch": 2.6096603515958354, "grad_norm": 0.24723872232078906, "learning_rate": 7.220536165907942e-06, "loss": 0.2106, "num_tokens": 1458769739.0, "step": 7646 }, { "epoch": 2.6100017067759005, "grad_norm": 0.24176816032519988, "learning_rate": 7.214213454729388e-06, "loss": 0.2151, "num_tokens": 1458958742.0, "step": 7647 }, { "epoch": 2.610343061955965, "grad_norm": 0.24493028866974675, "learning_rate": 7.207890743550835e-06, "loss": 0.2146, "num_tokens": 1459120008.0, "step": 7648 }, { "epoch": 2.61068441713603, "grad_norm": 0.23996696818221852, "learning_rate": 7.201568032372282e-06, "loss": 0.224, "num_tokens": 1459300675.0, "step": 7649 }, { "epoch": 2.611025772316095, "grad_norm": 0.24132774809303525, "learning_rate": 7.195245321193728e-06, "loss": 0.2304, "num_tokens": 1459513861.0, "step": 7650 }, { "epoch": 2.61136712749616, "grad_norm": 0.2652425861663693, "learning_rate": 7.188922610015175e-06, "loss": 0.2162, "num_tokens": 1459663127.0, "step": 7651 }, { "epoch": 2.6117084826762245, "grad_norm": 0.24511313357343376, "learning_rate": 7.182599898836621e-06, "loss": 0.2234, "num_tokens": 1459845848.0, "step": 7652 }, { "epoch": 2.6120498378562895, "grad_norm": 0.23828271215727934, "learning_rate": 7.176277187658067e-06, "loss": 0.2137, "num_tokens": 1460012157.0, "step": 7653 }, { "epoch": 2.6123911930363546, "grad_norm": 0.2258405141912513, "learning_rate": 7.169954476479515e-06, "loss": 0.2042, "num_tokens": 1460194517.0, "step": 7654 }, { "epoch": 2.612732548216419, "grad_norm": 0.2201146934065171, "learning_rate": 7.163631765300961e-06, "loss": 0.2251, "num_tokens": 1460403055.0, "step": 7655 }, { "epoch": 2.613073903396484, "grad_norm": 0.239103251109779, "learning_rate": 7.157309054122407e-06, "loss": 0.2347, "num_tokens": 1460595667.0, "step": 7656 }, { "epoch": 2.613415258576549, "grad_norm": 0.22480686225367497, "learning_rate": 7.150986342943855e-06, "loss": 0.2247, "num_tokens": 1460796345.0, "step": 7657 }, { "epoch": 2.613756613756614, "grad_norm": 0.23101096855574393, "learning_rate": 7.144663631765301e-06, "loss": 0.205, "num_tokens": 1460978660.0, "step": 7658 }, { "epoch": 2.6140979689366786, "grad_norm": 0.21625403076453495, "learning_rate": 7.1383409205867475e-06, "loss": 0.2275, "num_tokens": 1461183536.0, "step": 7659 }, { "epoch": 2.6144393241167436, "grad_norm": 0.20567989637922837, "learning_rate": 7.132018209408195e-06, "loss": 0.22, "num_tokens": 1461410221.0, "step": 7660 }, { "epoch": 2.6147806792968082, "grad_norm": 0.24231516517556248, "learning_rate": 7.1256954982296414e-06, "loss": 0.2225, "num_tokens": 1461598408.0, "step": 7661 }, { "epoch": 2.6151220344768733, "grad_norm": 0.2211463260832436, "learning_rate": 7.1193727870510876e-06, "loss": 0.2341, "num_tokens": 1461811839.0, "step": 7662 }, { "epoch": 2.615463389656938, "grad_norm": 0.24704157720139938, "learning_rate": 7.1130500758725345e-06, "loss": 0.2682, "num_tokens": 1462031877.0, "step": 7663 }, { "epoch": 2.615804744837003, "grad_norm": 0.2549540280801847, "learning_rate": 7.106727364693981e-06, "loss": 0.215, "num_tokens": 1462197389.0, "step": 7664 }, { "epoch": 2.6161461000170676, "grad_norm": 0.21990788918588572, "learning_rate": 7.100404653515428e-06, "loss": 0.21, "num_tokens": 1462385776.0, "step": 7665 }, { "epoch": 2.6164874551971327, "grad_norm": 0.22949722581611104, "learning_rate": 7.094081942336875e-06, "loss": 0.2136, "num_tokens": 1462546891.0, "step": 7666 }, { "epoch": 2.6168288103771973, "grad_norm": 0.23455381944624856, "learning_rate": 7.087759231158321e-06, "loss": 0.2354, "num_tokens": 1462729421.0, "step": 7667 }, { "epoch": 2.6171701655572623, "grad_norm": 0.24908644108446934, "learning_rate": 7.081436519979767e-06, "loss": 0.2468, "num_tokens": 1462911110.0, "step": 7668 }, { "epoch": 2.6175115207373274, "grad_norm": 0.22756088735390823, "learning_rate": 7.075113808801215e-06, "loss": 0.2141, "num_tokens": 1463095872.0, "step": 7669 }, { "epoch": 2.617852875917392, "grad_norm": 0.26612310042199183, "learning_rate": 7.068791097622661e-06, "loss": 0.1962, "num_tokens": 1463239681.0, "step": 7670 }, { "epoch": 2.6181942310974566, "grad_norm": 0.2639705643696271, "learning_rate": 7.062468386444107e-06, "loss": 0.2002, "num_tokens": 1463403215.0, "step": 7671 }, { "epoch": 2.6185355862775217, "grad_norm": 0.2542676986400771, "learning_rate": 7.056145675265555e-06, "loss": 0.2074, "num_tokens": 1463542024.0, "step": 7672 }, { "epoch": 2.6188769414575868, "grad_norm": 0.21429471456371924, "learning_rate": 7.049822964087001e-06, "loss": 0.2181, "num_tokens": 1463777401.0, "step": 7673 }, { "epoch": 2.6192182966376514, "grad_norm": 0.2769289171213444, "learning_rate": 7.043500252908447e-06, "loss": 0.1996, "num_tokens": 1463956199.0, "step": 7674 }, { "epoch": 2.6195596518177164, "grad_norm": 0.26260743033044476, "learning_rate": 7.037177541729895e-06, "loss": 0.238, "num_tokens": 1464126969.0, "step": 7675 }, { "epoch": 2.619901006997781, "grad_norm": 0.22113508155370237, "learning_rate": 7.030854830551341e-06, "loss": 0.2551, "num_tokens": 1464351096.0, "step": 7676 }, { "epoch": 2.620242362177846, "grad_norm": 0.2338815907086673, "learning_rate": 7.024532119372787e-06, "loss": 0.2291, "num_tokens": 1464559809.0, "step": 7677 }, { "epoch": 2.6205837173579107, "grad_norm": 0.24308591627813725, "learning_rate": 7.018209408194234e-06, "loss": 0.2362, "num_tokens": 1464747210.0, "step": 7678 }, { "epoch": 2.620925072537976, "grad_norm": 0.21360026784687777, "learning_rate": 7.01188669701568e-06, "loss": 0.2228, "num_tokens": 1464961402.0, "step": 7679 }, { "epoch": 2.621266427718041, "grad_norm": 0.2665196395570513, "learning_rate": 7.005563985837127e-06, "loss": 0.2247, "num_tokens": 1465149743.0, "step": 7680 }, { "epoch": 2.6216077828981055, "grad_norm": 0.23730167711007372, "learning_rate": 6.999241274658574e-06, "loss": 0.19, "num_tokens": 1465305190.0, "step": 7681 }, { "epoch": 2.62194913807817, "grad_norm": 0.22885754781090178, "learning_rate": 6.9929185634800204e-06, "loss": 0.2096, "num_tokens": 1465480771.0, "step": 7682 }, { "epoch": 2.622290493258235, "grad_norm": 0.20484854591275145, "learning_rate": 6.986595852301467e-06, "loss": 0.2213, "num_tokens": 1465713802.0, "step": 7683 }, { "epoch": 2.6226318484383, "grad_norm": 0.2739629440763664, "learning_rate": 6.980273141122914e-06, "loss": 0.2447, "num_tokens": 1465894392.0, "step": 7684 }, { "epoch": 2.622973203618365, "grad_norm": 0.2663264716773942, "learning_rate": 6.9739504299443606e-06, "loss": 0.2114, "num_tokens": 1466061785.0, "step": 7685 }, { "epoch": 2.62331455879843, "grad_norm": 0.23156733983000294, "learning_rate": 6.967627718765807e-06, "loss": 0.2405, "num_tokens": 1466273767.0, "step": 7686 }, { "epoch": 2.6236559139784945, "grad_norm": 0.23963655739944897, "learning_rate": 6.9613050075872545e-06, "loss": 0.2359, "num_tokens": 1466482215.0, "step": 7687 }, { "epoch": 2.6239972691585596, "grad_norm": 0.21976247264375956, "learning_rate": 6.954982296408701e-06, "loss": 0.2349, "num_tokens": 1466678296.0, "step": 7688 }, { "epoch": 2.624338624338624, "grad_norm": 0.24626818310714946, "learning_rate": 6.948659585230147e-06, "loss": 0.2148, "num_tokens": 1466846163.0, "step": 7689 }, { "epoch": 2.6246799795186893, "grad_norm": 0.27056163207444256, "learning_rate": 6.942336874051595e-06, "loss": 0.2325, "num_tokens": 1467020262.0, "step": 7690 }, { "epoch": 2.6250213346987543, "grad_norm": 0.21999791306540864, "learning_rate": 6.936014162873041e-06, "loss": 0.215, "num_tokens": 1467220619.0, "step": 7691 }, { "epoch": 2.625362689878819, "grad_norm": 0.21896809885400156, "learning_rate": 6.929691451694487e-06, "loss": 0.2061, "num_tokens": 1467446046.0, "step": 7692 }, { "epoch": 2.6257040450588836, "grad_norm": 0.218463714788249, "learning_rate": 6.923368740515934e-06, "loss": 0.2397, "num_tokens": 1467656479.0, "step": 7693 }, { "epoch": 2.6260454002389486, "grad_norm": 0.23019229390819335, "learning_rate": 6.91704602933738e-06, "loss": 0.2152, "num_tokens": 1467847434.0, "step": 7694 }, { "epoch": 2.6263867554190137, "grad_norm": 0.22865666462390524, "learning_rate": 6.910723318158826e-06, "loss": 0.2343, "num_tokens": 1468063709.0, "step": 7695 }, { "epoch": 2.6267281105990783, "grad_norm": 0.24015757568246018, "learning_rate": 6.904400606980274e-06, "loss": 0.2104, "num_tokens": 1468248619.0, "step": 7696 }, { "epoch": 2.6270694657791434, "grad_norm": 0.23674676746194867, "learning_rate": 6.89807789580172e-06, "loss": 0.2195, "num_tokens": 1468428421.0, "step": 7697 }, { "epoch": 2.627410820959208, "grad_norm": 0.24833959218168752, "learning_rate": 6.891755184623166e-06, "loss": 0.2247, "num_tokens": 1468607467.0, "step": 7698 }, { "epoch": 2.627752176139273, "grad_norm": 0.23954813881399106, "learning_rate": 6.885432473444614e-06, "loss": 0.214, "num_tokens": 1468808212.0, "step": 7699 }, { "epoch": 2.6280935313193377, "grad_norm": 0.24629339883764692, "learning_rate": 6.87910976226606e-06, "loss": 0.2059, "num_tokens": 1468975615.0, "step": 7700 }, { "epoch": 2.6284348864994027, "grad_norm": 0.26504743334844116, "learning_rate": 6.872787051087506e-06, "loss": 0.2412, "num_tokens": 1469192088.0, "step": 7701 }, { "epoch": 2.6287762416794673, "grad_norm": 0.21871387393746308, "learning_rate": 6.8664643399089525e-06, "loss": 0.1952, "num_tokens": 1469376345.0, "step": 7702 }, { "epoch": 2.6291175968595324, "grad_norm": 0.21099599043871856, "learning_rate": 6.8601416287304e-06, "loss": 0.2368, "num_tokens": 1469598029.0, "step": 7703 }, { "epoch": 2.629458952039597, "grad_norm": 0.22277387070863242, "learning_rate": 6.8538189175518465e-06, "loss": 0.2348, "num_tokens": 1469802911.0, "step": 7704 }, { "epoch": 2.629800307219662, "grad_norm": 0.19922598211267328, "learning_rate": 6.847496206373293e-06, "loss": 0.211, "num_tokens": 1470022572.0, "step": 7705 }, { "epoch": 2.630141662399727, "grad_norm": 0.24157398137896838, "learning_rate": 6.84117349519474e-06, "loss": 0.2072, "num_tokens": 1470192055.0, "step": 7706 }, { "epoch": 2.6304830175797917, "grad_norm": 0.22385362933982483, "learning_rate": 6.8348507840161866e-06, "loss": 0.2333, "num_tokens": 1470388149.0, "step": 7707 }, { "epoch": 2.6308243727598564, "grad_norm": 0.27272468977675496, "learning_rate": 6.828528072837633e-06, "loss": 0.2386, "num_tokens": 1470573431.0, "step": 7708 }, { "epoch": 2.6311657279399214, "grad_norm": 0.24595832934586953, "learning_rate": 6.82220536165908e-06, "loss": 0.222, "num_tokens": 1470745495.0, "step": 7709 }, { "epoch": 2.6315070831199865, "grad_norm": 0.24683146264532088, "learning_rate": 6.815882650480526e-06, "loss": 0.2348, "num_tokens": 1470941184.0, "step": 7710 }, { "epoch": 2.631848438300051, "grad_norm": 0.2404095401844933, "learning_rate": 6.809559939301973e-06, "loss": 0.208, "num_tokens": 1471117539.0, "step": 7711 }, { "epoch": 2.632189793480116, "grad_norm": 0.2312721884910066, "learning_rate": 6.80323722812342e-06, "loss": 0.2221, "num_tokens": 1471302017.0, "step": 7712 }, { "epoch": 2.632531148660181, "grad_norm": 0.24498050277245223, "learning_rate": 6.796914516944866e-06, "loss": 0.243, "num_tokens": 1471492158.0, "step": 7713 }, { "epoch": 2.632872503840246, "grad_norm": 0.23723000789130377, "learning_rate": 6.790591805766312e-06, "loss": 0.2087, "num_tokens": 1471671196.0, "step": 7714 }, { "epoch": 2.6332138590203105, "grad_norm": 0.24619371016232486, "learning_rate": 6.78426909458776e-06, "loss": 0.2172, "num_tokens": 1471836509.0, "step": 7715 }, { "epoch": 2.6335552142003755, "grad_norm": 0.2569589041584359, "learning_rate": 6.777946383409206e-06, "loss": 0.2065, "num_tokens": 1471991676.0, "step": 7716 }, { "epoch": 2.6338965693804406, "grad_norm": 0.2623329957510893, "learning_rate": 6.771623672230652e-06, "loss": 0.2084, "num_tokens": 1472163647.0, "step": 7717 }, { "epoch": 2.634237924560505, "grad_norm": 0.22097199775204254, "learning_rate": 6.7653009610521e-06, "loss": 0.2134, "num_tokens": 1472366143.0, "step": 7718 }, { "epoch": 2.63457927974057, "grad_norm": 0.23239884717832116, "learning_rate": 6.758978249873546e-06, "loss": 0.2431, "num_tokens": 1472566064.0, "step": 7719 }, { "epoch": 2.634920634920635, "grad_norm": 0.24889859052382446, "learning_rate": 6.752655538694992e-06, "loss": 0.2242, "num_tokens": 1472736185.0, "step": 7720 }, { "epoch": 2.6352619901007, "grad_norm": 0.2316888186808578, "learning_rate": 6.74633282751644e-06, "loss": 0.2447, "num_tokens": 1472940360.0, "step": 7721 }, { "epoch": 2.6356033452807646, "grad_norm": 0.23545653558270338, "learning_rate": 6.740010116337886e-06, "loss": 0.2681, "num_tokens": 1473163431.0, "step": 7722 }, { "epoch": 2.6359447004608296, "grad_norm": 0.23591746976719458, "learning_rate": 6.733687405159332e-06, "loss": 0.2128, "num_tokens": 1473346758.0, "step": 7723 }, { "epoch": 2.6362860556408942, "grad_norm": 0.22320077147591993, "learning_rate": 6.727364693980779e-06, "loss": 0.2344, "num_tokens": 1473562321.0, "step": 7724 }, { "epoch": 2.6366274108209593, "grad_norm": 0.22476531485695875, "learning_rate": 6.7210419828022255e-06, "loss": 0.2381, "num_tokens": 1473758701.0, "step": 7725 }, { "epoch": 2.636968766001024, "grad_norm": 0.24446399125581075, "learning_rate": 6.7147192716236725e-06, "loss": 0.2041, "num_tokens": 1473915201.0, "step": 7726 }, { "epoch": 2.637310121181089, "grad_norm": 0.23794575248694402, "learning_rate": 6.7083965604451194e-06, "loss": 0.2061, "num_tokens": 1474086778.0, "step": 7727 }, { "epoch": 2.637651476361154, "grad_norm": 0.19269224716260813, "learning_rate": 6.7020738492665656e-06, "loss": 0.2289, "num_tokens": 1474352929.0, "step": 7728 }, { "epoch": 2.6379928315412187, "grad_norm": 0.2382990485440568, "learning_rate": 6.695751138088012e-06, "loss": 0.2275, "num_tokens": 1474527134.0, "step": 7729 }, { "epoch": 2.6383341867212833, "grad_norm": 0.22431127286016256, "learning_rate": 6.6894284269094595e-06, "loss": 0.1985, "num_tokens": 1474700898.0, "step": 7730 }, { "epoch": 2.6386755419013483, "grad_norm": 0.21930671672873622, "learning_rate": 6.683105715730906e-06, "loss": 0.2382, "num_tokens": 1474941007.0, "step": 7731 }, { "epoch": 2.6390168970814134, "grad_norm": 0.2546400188997928, "learning_rate": 6.676783004552352e-06, "loss": 0.2243, "num_tokens": 1475114773.0, "step": 7732 }, { "epoch": 2.639358252261478, "grad_norm": 0.25169069347974443, "learning_rate": 6.6704602933738e-06, "loss": 0.2023, "num_tokens": 1475284058.0, "step": 7733 }, { "epoch": 2.639699607441543, "grad_norm": 0.25014335652726893, "learning_rate": 6.664137582195246e-06, "loss": 0.2268, "num_tokens": 1475463427.0, "step": 7734 }, { "epoch": 2.6400409626216077, "grad_norm": 0.25762818502251017, "learning_rate": 6.657814871016692e-06, "loss": 0.2124, "num_tokens": 1475627839.0, "step": 7735 }, { "epoch": 2.6403823178016728, "grad_norm": 0.22325564241370557, "learning_rate": 6.65149215983814e-06, "loss": 0.2407, "num_tokens": 1475845714.0, "step": 7736 }, { "epoch": 2.6407236729817374, "grad_norm": 0.26228268755276346, "learning_rate": 6.645169448659586e-06, "loss": 0.2218, "num_tokens": 1476022884.0, "step": 7737 }, { "epoch": 2.6410650281618024, "grad_norm": 0.2468067454442258, "learning_rate": 6.638846737481032e-06, "loss": 0.2524, "num_tokens": 1476230073.0, "step": 7738 }, { "epoch": 2.641406383341867, "grad_norm": 0.24039923418894313, "learning_rate": 6.632524026302479e-06, "loss": 0.2311, "num_tokens": 1476423083.0, "step": 7739 }, { "epoch": 2.641747738521932, "grad_norm": 0.25054871029660025, "learning_rate": 6.626201315123925e-06, "loss": 0.2455, "num_tokens": 1476611737.0, "step": 7740 }, { "epoch": 2.6420890937019967, "grad_norm": 0.2386952852207009, "learning_rate": 6.619878603945371e-06, "loss": 0.2537, "num_tokens": 1476811184.0, "step": 7741 }, { "epoch": 2.642430448882062, "grad_norm": 0.23141897561273567, "learning_rate": 6.613555892766819e-06, "loss": 0.203, "num_tokens": 1477017133.0, "step": 7742 }, { "epoch": 2.642771804062127, "grad_norm": 0.2414384916345294, "learning_rate": 6.607233181588265e-06, "loss": 0.2111, "num_tokens": 1477193082.0, "step": 7743 }, { "epoch": 2.6431131592421915, "grad_norm": 0.2453405446691464, "learning_rate": 6.600910470409711e-06, "loss": 0.2368, "num_tokens": 1477419785.0, "step": 7744 }, { "epoch": 2.643454514422256, "grad_norm": 0.23913262205298452, "learning_rate": 6.594587759231159e-06, "loss": 0.2416, "num_tokens": 1477617420.0, "step": 7745 }, { "epoch": 2.643795869602321, "grad_norm": 0.24533457017046048, "learning_rate": 6.588265048052605e-06, "loss": 0.2142, "num_tokens": 1477797902.0, "step": 7746 }, { "epoch": 2.644137224782386, "grad_norm": 0.25569234456558754, "learning_rate": 6.5819423368740515e-06, "loss": 0.231, "num_tokens": 1477957623.0, "step": 7747 }, { "epoch": 2.644478579962451, "grad_norm": 0.21923230943685593, "learning_rate": 6.575619625695499e-06, "loss": 0.214, "num_tokens": 1478151339.0, "step": 7748 }, { "epoch": 2.644819935142516, "grad_norm": 0.25561932236677604, "learning_rate": 6.5692969145169454e-06, "loss": 0.2466, "num_tokens": 1478367562.0, "step": 7749 }, { "epoch": 2.6451612903225805, "grad_norm": 0.22324313531712162, "learning_rate": 6.562974203338392e-06, "loss": 0.2225, "num_tokens": 1478564062.0, "step": 7750 }, { "epoch": 2.6455026455026456, "grad_norm": 0.2546279225631828, "learning_rate": 6.556651492159839e-06, "loss": 0.2044, "num_tokens": 1478717425.0, "step": 7751 }, { "epoch": 2.64584400068271, "grad_norm": 0.23684062642060535, "learning_rate": 6.5503287809812855e-06, "loss": 0.2369, "num_tokens": 1478917119.0, "step": 7752 }, { "epoch": 2.6461853558627753, "grad_norm": 0.23607534271025793, "learning_rate": 6.544006069802732e-06, "loss": 0.2396, "num_tokens": 1479128640.0, "step": 7753 }, { "epoch": 2.6465267110428403, "grad_norm": 0.22426056124756572, "learning_rate": 6.537683358624179e-06, "loss": 0.23, "num_tokens": 1479330716.0, "step": 7754 }, { "epoch": 2.646868066222905, "grad_norm": 0.22774254694036244, "learning_rate": 6.531360647445625e-06, "loss": 0.2071, "num_tokens": 1479511564.0, "step": 7755 }, { "epoch": 2.6472094214029696, "grad_norm": 0.23528327184464717, "learning_rate": 6.525037936267071e-06, "loss": 0.2231, "num_tokens": 1479693245.0, "step": 7756 }, { "epoch": 2.6475507765830346, "grad_norm": 0.22283708821977563, "learning_rate": 6.518715225088519e-06, "loss": 0.2235, "num_tokens": 1479898745.0, "step": 7757 }, { "epoch": 2.6478921317630997, "grad_norm": 0.24581042829782807, "learning_rate": 6.512392513909965e-06, "loss": 0.232, "num_tokens": 1480080943.0, "step": 7758 }, { "epoch": 2.6482334869431643, "grad_norm": 0.26251598802915715, "learning_rate": 6.506069802731411e-06, "loss": 0.2167, "num_tokens": 1480247616.0, "step": 7759 }, { "epoch": 2.6485748421232294, "grad_norm": 0.24664132393407615, "learning_rate": 6.499747091552859e-06, "loss": 0.2211, "num_tokens": 1480436562.0, "step": 7760 }, { "epoch": 2.648916197303294, "grad_norm": 0.28650480496591973, "learning_rate": 6.493424380374305e-06, "loss": 0.2245, "num_tokens": 1480600054.0, "step": 7761 }, { "epoch": 2.649257552483359, "grad_norm": 0.24638492605942078, "learning_rate": 6.487101669195751e-06, "loss": 0.2005, "num_tokens": 1480749443.0, "step": 7762 }, { "epoch": 2.6495989076634237, "grad_norm": 0.2691682059075872, "learning_rate": 6.480778958017199e-06, "loss": 0.2351, "num_tokens": 1480916004.0, "step": 7763 }, { "epoch": 2.6499402628434887, "grad_norm": 0.24420187548910047, "learning_rate": 6.474456246838645e-06, "loss": 0.2383, "num_tokens": 1481102766.0, "step": 7764 }, { "epoch": 2.6502816180235538, "grad_norm": 0.23753500801408348, "learning_rate": 6.468133535660091e-06, "loss": 0.2214, "num_tokens": 1481277962.0, "step": 7765 }, { "epoch": 2.6506229732036184, "grad_norm": 0.22832041566454306, "learning_rate": 6.461810824481539e-06, "loss": 0.2255, "num_tokens": 1481461192.0, "step": 7766 }, { "epoch": 2.650964328383683, "grad_norm": 0.23415565765098598, "learning_rate": 6.455488113302985e-06, "loss": 0.2273, "num_tokens": 1481632430.0, "step": 7767 }, { "epoch": 2.651305683563748, "grad_norm": 0.2372829031626328, "learning_rate": 6.449165402124431e-06, "loss": 0.2101, "num_tokens": 1481818726.0, "step": 7768 }, { "epoch": 2.651647038743813, "grad_norm": 0.24041461645040255, "learning_rate": 6.4428426909458775e-06, "loss": 0.209, "num_tokens": 1481990793.0, "step": 7769 }, { "epoch": 2.6519883939238778, "grad_norm": 0.2287049914099528, "learning_rate": 6.4365199797673245e-06, "loss": 0.2438, "num_tokens": 1482210933.0, "step": 7770 }, { "epoch": 2.652329749103943, "grad_norm": 0.20840693026280485, "learning_rate": 6.430197268588771e-06, "loss": 0.2492, "num_tokens": 1482463120.0, "step": 7771 }, { "epoch": 2.6526711042840074, "grad_norm": 0.252420790485925, "learning_rate": 6.423874557410217e-06, "loss": 0.2303, "num_tokens": 1482641838.0, "step": 7772 }, { "epoch": 2.6530124594640725, "grad_norm": 0.22351924566318518, "learning_rate": 6.4175518462316646e-06, "loss": 0.2282, "num_tokens": 1482847719.0, "step": 7773 }, { "epoch": 2.653353814644137, "grad_norm": 0.23906457147792146, "learning_rate": 6.411229135053111e-06, "loss": 0.2398, "num_tokens": 1483037471.0, "step": 7774 }, { "epoch": 2.653695169824202, "grad_norm": 0.22977052632182882, "learning_rate": 6.404906423874557e-06, "loss": 0.2381, "num_tokens": 1483238675.0, "step": 7775 }, { "epoch": 2.654036525004267, "grad_norm": 0.23219998963664698, "learning_rate": 6.398583712696005e-06, "loss": 0.2264, "num_tokens": 1483442025.0, "step": 7776 }, { "epoch": 2.654377880184332, "grad_norm": 0.23623066384207192, "learning_rate": 6.392261001517451e-06, "loss": 0.2221, "num_tokens": 1483633048.0, "step": 7777 }, { "epoch": 2.6547192353643965, "grad_norm": 0.2714819615824706, "learning_rate": 6.385938290338897e-06, "loss": 0.2302, "num_tokens": 1483797503.0, "step": 7778 }, { "epoch": 2.6550605905444615, "grad_norm": 0.19697342266588597, "learning_rate": 6.379615579160345e-06, "loss": 0.2117, "num_tokens": 1484029534.0, "step": 7779 }, { "epoch": 2.6554019457245266, "grad_norm": 0.2292756427741892, "learning_rate": 6.373292867981791e-06, "loss": 0.1976, "num_tokens": 1484208541.0, "step": 7780 }, { "epoch": 2.655743300904591, "grad_norm": 0.24829789356827708, "learning_rate": 6.366970156803237e-06, "loss": 0.2029, "num_tokens": 1484377358.0, "step": 7781 }, { "epoch": 2.656084656084656, "grad_norm": 0.23843846564336782, "learning_rate": 6.360647445624685e-06, "loss": 0.2613, "num_tokens": 1484581530.0, "step": 7782 }, { "epoch": 2.656426011264721, "grad_norm": 0.244197756449978, "learning_rate": 6.354324734446131e-06, "loss": 0.2307, "num_tokens": 1484781937.0, "step": 7783 }, { "epoch": 2.656767366444786, "grad_norm": 0.23622270849091004, "learning_rate": 6.348002023267577e-06, "loss": 0.225, "num_tokens": 1484968270.0, "step": 7784 }, { "epoch": 2.6571087216248506, "grad_norm": 0.2352288792228925, "learning_rate": 6.341679312089024e-06, "loss": 0.2282, "num_tokens": 1485164824.0, "step": 7785 }, { "epoch": 2.6574500768049156, "grad_norm": 0.2666651485558298, "learning_rate": 6.33535660091047e-06, "loss": 0.2318, "num_tokens": 1485318383.0, "step": 7786 }, { "epoch": 2.6577914319849802, "grad_norm": 0.23887183307204768, "learning_rate": 6.329033889731916e-06, "loss": 0.2365, "num_tokens": 1485541231.0, "step": 7787 }, { "epoch": 2.6581327871650453, "grad_norm": 0.22752941960337966, "learning_rate": 6.322711178553364e-06, "loss": 0.202, "num_tokens": 1485712973.0, "step": 7788 }, { "epoch": 2.65847414234511, "grad_norm": 0.2298802507094141, "learning_rate": 6.31638846737481e-06, "loss": 0.2231, "num_tokens": 1485909498.0, "step": 7789 }, { "epoch": 2.658815497525175, "grad_norm": 0.24677444261578874, "learning_rate": 6.3100657561962565e-06, "loss": 0.2143, "num_tokens": 1486077998.0, "step": 7790 }, { "epoch": 2.65915685270524, "grad_norm": 0.2356526960923309, "learning_rate": 6.303743045017704e-06, "loss": 0.2151, "num_tokens": 1486258865.0, "step": 7791 }, { "epoch": 2.6594982078853047, "grad_norm": 0.24143099821023267, "learning_rate": 6.2974203338391505e-06, "loss": 0.2014, "num_tokens": 1486435321.0, "step": 7792 }, { "epoch": 2.6598395630653693, "grad_norm": 0.2221510711200677, "learning_rate": 6.291097622660597e-06, "loss": 0.2311, "num_tokens": 1486623710.0, "step": 7793 }, { "epoch": 2.6601809182454343, "grad_norm": 0.22213097276829055, "learning_rate": 6.2847749114820444e-06, "loss": 0.209, "num_tokens": 1486804688.0, "step": 7794 }, { "epoch": 2.6605222734254994, "grad_norm": 0.23045233109148366, "learning_rate": 6.2784522003034906e-06, "loss": 0.2268, "num_tokens": 1486987139.0, "step": 7795 }, { "epoch": 2.660863628605564, "grad_norm": 0.24000480190086068, "learning_rate": 6.272129489124937e-06, "loss": 0.2314, "num_tokens": 1487178350.0, "step": 7796 }, { "epoch": 2.661204983785629, "grad_norm": 0.22953410793639553, "learning_rate": 6.2658067779463845e-06, "loss": 0.2112, "num_tokens": 1487356987.0, "step": 7797 }, { "epoch": 2.6615463389656937, "grad_norm": 0.23594574488847883, "learning_rate": 6.259484066767831e-06, "loss": 0.2132, "num_tokens": 1487542603.0, "step": 7798 }, { "epoch": 2.6618876941457588, "grad_norm": 0.22147583085534647, "learning_rate": 6.253161355589277e-06, "loss": 0.2292, "num_tokens": 1487753455.0, "step": 7799 }, { "epoch": 2.6622290493258234, "grad_norm": 0.22359631178216263, "learning_rate": 6.246838644410724e-06, "loss": 0.2183, "num_tokens": 1487966354.0, "step": 7800 }, { "epoch": 2.6625704045058884, "grad_norm": 0.21813353177092737, "learning_rate": 6.24051593323217e-06, "loss": 0.2435, "num_tokens": 1488187704.0, "step": 7801 }, { "epoch": 2.6629117596859535, "grad_norm": 0.25069106989076917, "learning_rate": 6.234193222053617e-06, "loss": 0.2252, "num_tokens": 1488369301.0, "step": 7802 }, { "epoch": 2.663253114866018, "grad_norm": 0.24855795619779122, "learning_rate": 6.227870510875063e-06, "loss": 0.2155, "num_tokens": 1488545294.0, "step": 7803 }, { "epoch": 2.6635944700460827, "grad_norm": 0.23000647649224554, "learning_rate": 6.22154779969651e-06, "loss": 0.2565, "num_tokens": 1488788699.0, "step": 7804 }, { "epoch": 2.663935825226148, "grad_norm": 0.24361568539580736, "learning_rate": 6.215225088517957e-06, "loss": 0.2358, "num_tokens": 1488980206.0, "step": 7805 }, { "epoch": 2.664277180406213, "grad_norm": 0.25080763868514483, "learning_rate": 6.208902377339403e-06, "loss": 0.2247, "num_tokens": 1489157371.0, "step": 7806 }, { "epoch": 2.6646185355862775, "grad_norm": 0.25019922273340495, "learning_rate": 6.20257966616085e-06, "loss": 0.2311, "num_tokens": 1489331616.0, "step": 7807 }, { "epoch": 2.6649598907663425, "grad_norm": 0.2265148575278588, "learning_rate": 6.196256954982297e-06, "loss": 0.2382, "num_tokens": 1489523049.0, "step": 7808 }, { "epoch": 2.665301245946407, "grad_norm": 0.23261071479006273, "learning_rate": 6.189934243803743e-06, "loss": 0.2145, "num_tokens": 1489707274.0, "step": 7809 }, { "epoch": 2.665642601126472, "grad_norm": 0.20602118051006418, "learning_rate": 6.18361153262519e-06, "loss": 0.2064, "num_tokens": 1489897119.0, "step": 7810 }, { "epoch": 2.665983956306537, "grad_norm": 0.2597597312941921, "learning_rate": 6.177288821446637e-06, "loss": 0.211, "num_tokens": 1490052363.0, "step": 7811 }, { "epoch": 2.666325311486602, "grad_norm": 0.24077936320649032, "learning_rate": 6.170966110268083e-06, "loss": 0.2117, "num_tokens": 1490227191.0, "step": 7812 }, { "epoch": 2.6666666666666665, "grad_norm": 0.25802671690345214, "learning_rate": 6.16464339908953e-06, "loss": 0.2236, "num_tokens": 1490397275.0, "step": 7813 }, { "epoch": 2.6670080218467316, "grad_norm": 0.22783250978086433, "learning_rate": 6.1583206879109765e-06, "loss": 0.2462, "num_tokens": 1490612864.0, "step": 7814 }, { "epoch": 2.667349377026796, "grad_norm": 0.24850559063262664, "learning_rate": 6.1519979767324235e-06, "loss": 0.2312, "num_tokens": 1490803449.0, "step": 7815 }, { "epoch": 2.6676907322068613, "grad_norm": 0.2334983456058685, "learning_rate": 6.14567526555387e-06, "loss": 0.217, "num_tokens": 1491006872.0, "step": 7816 }, { "epoch": 2.6680320873869263, "grad_norm": 0.22370824333396452, "learning_rate": 6.139352554375316e-06, "loss": 0.2202, "num_tokens": 1491215258.0, "step": 7817 }, { "epoch": 2.668373442566991, "grad_norm": 0.23903105419028212, "learning_rate": 6.133029843196763e-06, "loss": 0.2337, "num_tokens": 1491405396.0, "step": 7818 }, { "epoch": 2.6687147977470556, "grad_norm": 0.236347154906521, "learning_rate": 6.12670713201821e-06, "loss": 0.207, "num_tokens": 1491579288.0, "step": 7819 }, { "epoch": 2.6690561529271206, "grad_norm": 0.26186920435225863, "learning_rate": 6.120384420839656e-06, "loss": 0.2109, "num_tokens": 1491726811.0, "step": 7820 }, { "epoch": 2.6693975081071857, "grad_norm": 0.21919615024853653, "learning_rate": 6.114061709661103e-06, "loss": 0.2285, "num_tokens": 1491931830.0, "step": 7821 }, { "epoch": 2.6697388632872503, "grad_norm": 0.21358723748121392, "learning_rate": 6.10773899848255e-06, "loss": 0.2215, "num_tokens": 1492139257.0, "step": 7822 }, { "epoch": 2.6700802184673154, "grad_norm": 0.24278940831147147, "learning_rate": 6.101416287303996e-06, "loss": 0.2169, "num_tokens": 1492331785.0, "step": 7823 }, { "epoch": 2.67042157364738, "grad_norm": 0.2460957904804229, "learning_rate": 6.095093576125443e-06, "loss": 0.2218, "num_tokens": 1492524667.0, "step": 7824 }, { "epoch": 2.670762928827445, "grad_norm": 0.24071508425376467, "learning_rate": 6.08877086494689e-06, "loss": 0.2251, "num_tokens": 1492706612.0, "step": 7825 }, { "epoch": 2.6711042840075097, "grad_norm": 0.25181594004813085, "learning_rate": 6.082448153768336e-06, "loss": 0.2382, "num_tokens": 1492895019.0, "step": 7826 }, { "epoch": 2.6714456391875747, "grad_norm": 0.21872151600123865, "learning_rate": 6.076125442589783e-06, "loss": 0.2408, "num_tokens": 1493115473.0, "step": 7827 }, { "epoch": 2.67178699436764, "grad_norm": 0.25763417791946824, "learning_rate": 6.06980273141123e-06, "loss": 0.2341, "num_tokens": 1493283093.0, "step": 7828 }, { "epoch": 2.6721283495477044, "grad_norm": 0.23852048022085037, "learning_rate": 6.063480020232676e-06, "loss": 0.2265, "num_tokens": 1493471903.0, "step": 7829 }, { "epoch": 2.672469704727769, "grad_norm": 0.2199909325364293, "learning_rate": 6.057157309054122e-06, "loss": 0.2151, "num_tokens": 1493668801.0, "step": 7830 }, { "epoch": 2.672811059907834, "grad_norm": 0.3088705578801738, "learning_rate": 6.050834597875569e-06, "loss": 0.2009, "num_tokens": 1493849123.0, "step": 7831 }, { "epoch": 2.673152415087899, "grad_norm": 0.23553309959242544, "learning_rate": 6.044511886697015e-06, "loss": 0.2251, "num_tokens": 1494009146.0, "step": 7832 }, { "epoch": 2.6734937702679638, "grad_norm": 0.2650398773732862, "learning_rate": 6.038189175518462e-06, "loss": 0.2377, "num_tokens": 1494154472.0, "step": 7833 }, { "epoch": 2.673835125448029, "grad_norm": 0.2252152315839819, "learning_rate": 6.031866464339909e-06, "loss": 0.2392, "num_tokens": 1494358923.0, "step": 7834 }, { "epoch": 2.6741764806280934, "grad_norm": 0.22893859903602334, "learning_rate": 6.0255437531613555e-06, "loss": 0.2005, "num_tokens": 1494540657.0, "step": 7835 }, { "epoch": 2.6745178358081585, "grad_norm": 0.2528247926431481, "learning_rate": 6.0192210419828025e-06, "loss": 0.2268, "num_tokens": 1494735378.0, "step": 7836 }, { "epoch": 2.674859190988223, "grad_norm": 0.2648574741393229, "learning_rate": 6.0128983308042495e-06, "loss": 0.2335, "num_tokens": 1494909611.0, "step": 7837 }, { "epoch": 2.675200546168288, "grad_norm": 0.2378146887049934, "learning_rate": 6.006575619625696e-06, "loss": 0.2155, "num_tokens": 1495084335.0, "step": 7838 }, { "epoch": 2.6755419013483532, "grad_norm": 0.23860590585333066, "learning_rate": 6.000252908447143e-06, "loss": 0.2027, "num_tokens": 1495269359.0, "step": 7839 }, { "epoch": 2.675883256528418, "grad_norm": 0.2451516532970607, "learning_rate": 5.9939301972685896e-06, "loss": 0.219, "num_tokens": 1495448863.0, "step": 7840 }, { "epoch": 2.6762246117084825, "grad_norm": 0.21537287153228157, "learning_rate": 5.987607486090036e-06, "loss": 0.2404, "num_tokens": 1495674194.0, "step": 7841 }, { "epoch": 2.6765659668885475, "grad_norm": 0.23234958199901445, "learning_rate": 5.981284774911483e-06, "loss": 0.2164, "num_tokens": 1495856678.0, "step": 7842 }, { "epoch": 2.6769073220686126, "grad_norm": 0.25269777267495774, "learning_rate": 5.97496206373293e-06, "loss": 0.2461, "num_tokens": 1496053920.0, "step": 7843 }, { "epoch": 2.677248677248677, "grad_norm": 0.2507497214108851, "learning_rate": 5.968639352554376e-06, "loss": 0.1901, "num_tokens": 1496205735.0, "step": 7844 }, { "epoch": 2.6775900324287423, "grad_norm": 0.27274621957047523, "learning_rate": 5.962316641375822e-06, "loss": 0.2717, "num_tokens": 1496381965.0, "step": 7845 }, { "epoch": 2.677931387608807, "grad_norm": 0.25178093881284347, "learning_rate": 5.955993930197269e-06, "loss": 0.235, "num_tokens": 1496571380.0, "step": 7846 }, { "epoch": 2.678272742788872, "grad_norm": 0.2603678746115344, "learning_rate": 5.949671219018715e-06, "loss": 0.23, "num_tokens": 1496749274.0, "step": 7847 }, { "epoch": 2.6786140979689366, "grad_norm": 0.2323912746993767, "learning_rate": 5.943348507840162e-06, "loss": 0.2225, "num_tokens": 1496952814.0, "step": 7848 }, { "epoch": 2.6789554531490016, "grad_norm": 0.19289414291628623, "learning_rate": 5.937025796661608e-06, "loss": 0.2318, "num_tokens": 1497203050.0, "step": 7849 }, { "epoch": 2.6792968083290662, "grad_norm": 0.24010275625220975, "learning_rate": 5.930703085483055e-06, "loss": 0.2275, "num_tokens": 1497374108.0, "step": 7850 }, { "epoch": 2.6796381635091313, "grad_norm": 0.25875353748217766, "learning_rate": 5.924380374304502e-06, "loss": 0.2238, "num_tokens": 1497516104.0, "step": 7851 }, { "epoch": 2.679979518689196, "grad_norm": 0.219138250620874, "learning_rate": 5.918057663125948e-06, "loss": 0.2335, "num_tokens": 1497760600.0, "step": 7852 }, { "epoch": 2.680320873869261, "grad_norm": 0.22376808649011437, "learning_rate": 5.911734951947395e-06, "loss": 0.2311, "num_tokens": 1497962364.0, "step": 7853 }, { "epoch": 2.680662229049326, "grad_norm": 0.2601260960246331, "learning_rate": 5.905412240768842e-06, "loss": 0.2554, "num_tokens": 1498157648.0, "step": 7854 }, { "epoch": 2.6810035842293907, "grad_norm": 0.20262611900294764, "learning_rate": 5.899089529590288e-06, "loss": 0.2469, "num_tokens": 1498396598.0, "step": 7855 }, { "epoch": 2.6813449394094553, "grad_norm": 0.21592419484206685, "learning_rate": 5.892766818411735e-06, "loss": 0.2105, "num_tokens": 1498595825.0, "step": 7856 }, { "epoch": 2.6816862945895203, "grad_norm": 0.22779802015099698, "learning_rate": 5.886444107233182e-06, "loss": 0.2328, "num_tokens": 1498793051.0, "step": 7857 }, { "epoch": 2.6820276497695854, "grad_norm": 0.24699543117085018, "learning_rate": 5.8801213960546285e-06, "loss": 0.25, "num_tokens": 1499021114.0, "step": 7858 }, { "epoch": 2.68236900494965, "grad_norm": 0.24261116341033295, "learning_rate": 5.8737986848760755e-06, "loss": 0.2156, "num_tokens": 1499185985.0, "step": 7859 }, { "epoch": 2.682710360129715, "grad_norm": 0.22227125838743506, "learning_rate": 5.867475973697522e-06, "loss": 0.2139, "num_tokens": 1499376799.0, "step": 7860 }, { "epoch": 2.6830517153097797, "grad_norm": 0.25397132175200665, "learning_rate": 5.861153262518968e-06, "loss": 0.2132, "num_tokens": 1499539131.0, "step": 7861 }, { "epoch": 2.6833930704898448, "grad_norm": 0.23457532353317587, "learning_rate": 5.854830551340415e-06, "loss": 0.2166, "num_tokens": 1499701921.0, "step": 7862 }, { "epoch": 2.6837344256699094, "grad_norm": 0.27081044779181257, "learning_rate": 5.848507840161862e-06, "loss": 0.2277, "num_tokens": 1499867619.0, "step": 7863 }, { "epoch": 2.6840757808499744, "grad_norm": 0.23228339087641453, "learning_rate": 5.842185128983308e-06, "loss": 0.2186, "num_tokens": 1500049126.0, "step": 7864 }, { "epoch": 2.6844171360300395, "grad_norm": 0.21136167519736998, "learning_rate": 5.835862417804755e-06, "loss": 0.2426, "num_tokens": 1500284819.0, "step": 7865 }, { "epoch": 2.684758491210104, "grad_norm": 0.2241062613814082, "learning_rate": 5.829539706626202e-06, "loss": 0.2312, "num_tokens": 1500488196.0, "step": 7866 }, { "epoch": 2.6850998463901687, "grad_norm": 0.2150094012903945, "learning_rate": 5.823216995447648e-06, "loss": 0.2426, "num_tokens": 1500703626.0, "step": 7867 }, { "epoch": 2.685441201570234, "grad_norm": 0.2534078945461661, "learning_rate": 5.816894284269095e-06, "loss": 0.2293, "num_tokens": 1500870449.0, "step": 7868 }, { "epoch": 2.685782556750299, "grad_norm": 0.23375585037172664, "learning_rate": 5.810571573090542e-06, "loss": 0.2516, "num_tokens": 1501076063.0, "step": 7869 }, { "epoch": 2.6861239119303635, "grad_norm": 0.22772770899527894, "learning_rate": 5.804248861911988e-06, "loss": 0.2222, "num_tokens": 1501291730.0, "step": 7870 }, { "epoch": 2.6864652671104285, "grad_norm": 0.23310996716225552, "learning_rate": 5.797926150733435e-06, "loss": 0.2164, "num_tokens": 1501480674.0, "step": 7871 }, { "epoch": 2.686806622290493, "grad_norm": 0.2153090618827544, "learning_rate": 5.791603439554882e-06, "loss": 0.239, "num_tokens": 1501715532.0, "step": 7872 }, { "epoch": 2.6871479774705582, "grad_norm": 0.25021944822701375, "learning_rate": 5.785280728376328e-06, "loss": 0.2191, "num_tokens": 1501881196.0, "step": 7873 }, { "epoch": 2.687489332650623, "grad_norm": 0.23830982146942611, "learning_rate": 5.778958017197775e-06, "loss": 0.2283, "num_tokens": 1502061491.0, "step": 7874 }, { "epoch": 2.687830687830688, "grad_norm": 0.24142978272373447, "learning_rate": 5.772635306019221e-06, "loss": 0.2117, "num_tokens": 1502238582.0, "step": 7875 }, { "epoch": 2.688172043010753, "grad_norm": 0.2433617306605645, "learning_rate": 5.766312594840667e-06, "loss": 0.2235, "num_tokens": 1502415192.0, "step": 7876 }, { "epoch": 2.6885133981908176, "grad_norm": 0.24398466957321543, "learning_rate": 5.759989883662114e-06, "loss": 0.1928, "num_tokens": 1502560224.0, "step": 7877 }, { "epoch": 2.688854753370882, "grad_norm": 0.269820607442147, "learning_rate": 5.753667172483561e-06, "loss": 0.2223, "num_tokens": 1502706057.0, "step": 7878 }, { "epoch": 2.6891961085509473, "grad_norm": 0.2472361787808093, "learning_rate": 5.7473444613050075e-06, "loss": 0.2341, "num_tokens": 1502897306.0, "step": 7879 }, { "epoch": 2.6895374637310123, "grad_norm": 0.28440778589480975, "learning_rate": 5.7410217501264545e-06, "loss": 0.236, "num_tokens": 1503064726.0, "step": 7880 }, { "epoch": 2.689878818911077, "grad_norm": 0.21899185637741259, "learning_rate": 5.7346990389479015e-06, "loss": 0.2489, "num_tokens": 1503281314.0, "step": 7881 }, { "epoch": 2.6902201740911416, "grad_norm": 0.23525090216551978, "learning_rate": 5.728376327769348e-06, "loss": 0.2138, "num_tokens": 1503462669.0, "step": 7882 }, { "epoch": 2.6905615292712066, "grad_norm": 0.2823718523477825, "learning_rate": 5.722053616590795e-06, "loss": 0.2222, "num_tokens": 1503613175.0, "step": 7883 }, { "epoch": 2.6909028844512717, "grad_norm": 0.24909028149866305, "learning_rate": 5.715730905412241e-06, "loss": 0.2215, "num_tokens": 1503803359.0, "step": 7884 }, { "epoch": 2.6912442396313363, "grad_norm": 0.24935903053560646, "learning_rate": 5.709408194233688e-06, "loss": 0.1952, "num_tokens": 1503947251.0, "step": 7885 }, { "epoch": 2.6915855948114014, "grad_norm": 0.26604521358054817, "learning_rate": 5.703085483055135e-06, "loss": 0.2032, "num_tokens": 1504081643.0, "step": 7886 }, { "epoch": 2.691926949991466, "grad_norm": 0.253386009270942, "learning_rate": 5.696762771876581e-06, "loss": 0.2, "num_tokens": 1504238261.0, "step": 7887 }, { "epoch": 2.692268305171531, "grad_norm": 0.22248728824828498, "learning_rate": 5.690440060698028e-06, "loss": 0.2089, "num_tokens": 1504448072.0, "step": 7888 }, { "epoch": 2.6926096603515957, "grad_norm": 0.22123335801898453, "learning_rate": 5.684117349519475e-06, "loss": 0.22, "num_tokens": 1504657159.0, "step": 7889 }, { "epoch": 2.6929510155316607, "grad_norm": 0.2274485515554079, "learning_rate": 5.677794638340921e-06, "loss": 0.2272, "num_tokens": 1504858563.0, "step": 7890 }, { "epoch": 2.693292370711726, "grad_norm": 0.274029138888232, "learning_rate": 5.671471927162367e-06, "loss": 0.2422, "num_tokens": 1505034140.0, "step": 7891 }, { "epoch": 2.6936337258917904, "grad_norm": 0.261310280392417, "learning_rate": 5.665149215983814e-06, "loss": 0.2479, "num_tokens": 1505217794.0, "step": 7892 }, { "epoch": 2.693975081071855, "grad_norm": 0.22291610093917585, "learning_rate": 5.65882650480526e-06, "loss": 0.2333, "num_tokens": 1505423152.0, "step": 7893 }, { "epoch": 2.69431643625192, "grad_norm": 0.21844267150095573, "learning_rate": 5.652503793626707e-06, "loss": 0.217, "num_tokens": 1505621534.0, "step": 7894 }, { "epoch": 2.694657791431985, "grad_norm": 0.23189489870923444, "learning_rate": 5.646181082448154e-06, "loss": 0.234, "num_tokens": 1505822011.0, "step": 7895 }, { "epoch": 2.6949991466120498, "grad_norm": 0.20799490666817835, "learning_rate": 5.6398583712696e-06, "loss": 0.2152, "num_tokens": 1506031481.0, "step": 7896 }, { "epoch": 2.695340501792115, "grad_norm": 0.24821833885332373, "learning_rate": 5.633535660091047e-06, "loss": 0.2188, "num_tokens": 1506198775.0, "step": 7897 }, { "epoch": 2.6956818569721794, "grad_norm": 0.204964435502956, "learning_rate": 5.627212948912494e-06, "loss": 0.2258, "num_tokens": 1506415055.0, "step": 7898 }, { "epoch": 2.6960232121522445, "grad_norm": 0.24204393065316265, "learning_rate": 5.62089023773394e-06, "loss": 0.2019, "num_tokens": 1506567451.0, "step": 7899 }, { "epoch": 2.696364567332309, "grad_norm": 0.2530602382008575, "learning_rate": 5.614567526555387e-06, "loss": 0.2015, "num_tokens": 1506728227.0, "step": 7900 }, { "epoch": 2.696705922512374, "grad_norm": 0.21002649620634534, "learning_rate": 5.608244815376834e-06, "loss": 0.2287, "num_tokens": 1506954603.0, "step": 7901 }, { "epoch": 2.6970472776924392, "grad_norm": 0.24783834637420749, "learning_rate": 5.6019221041982805e-06, "loss": 0.2108, "num_tokens": 1507110884.0, "step": 7902 }, { "epoch": 2.697388632872504, "grad_norm": 0.23676416849213813, "learning_rate": 5.5955993930197275e-06, "loss": 0.2205, "num_tokens": 1507283080.0, "step": 7903 }, { "epoch": 2.6977299880525685, "grad_norm": 0.22481560173662266, "learning_rate": 5.5892766818411745e-06, "loss": 0.2134, "num_tokens": 1507477320.0, "step": 7904 }, { "epoch": 2.6980713432326335, "grad_norm": 0.25609200955769934, "learning_rate": 5.582953970662621e-06, "loss": 0.2436, "num_tokens": 1507653990.0, "step": 7905 }, { "epoch": 2.6984126984126986, "grad_norm": 0.2364643110482449, "learning_rate": 5.576631259484067e-06, "loss": 0.2467, "num_tokens": 1507871011.0, "step": 7906 }, { "epoch": 2.698754053592763, "grad_norm": 0.23203576401057446, "learning_rate": 5.570308548305514e-06, "loss": 0.2475, "num_tokens": 1508080878.0, "step": 7907 }, { "epoch": 2.6990954087728283, "grad_norm": 0.22525057861268138, "learning_rate": 5.56398583712696e-06, "loss": 0.1966, "num_tokens": 1508266194.0, "step": 7908 }, { "epoch": 2.699436763952893, "grad_norm": 0.249113918461448, "learning_rate": 5.557663125948407e-06, "loss": 0.239, "num_tokens": 1508461476.0, "step": 7909 }, { "epoch": 2.699778119132958, "grad_norm": 0.23424308506108243, "learning_rate": 5.551340414769854e-06, "loss": 0.2307, "num_tokens": 1508647387.0, "step": 7910 }, { "epoch": 2.7001194743130226, "grad_norm": 0.23351223422204961, "learning_rate": 5.5450177035913e-06, "loss": 0.219, "num_tokens": 1508819142.0, "step": 7911 }, { "epoch": 2.7004608294930876, "grad_norm": 0.23068276270220514, "learning_rate": 5.538694992412747e-06, "loss": 0.202, "num_tokens": 1508991058.0, "step": 7912 }, { "epoch": 2.7008021846731523, "grad_norm": 0.2755580035764206, "learning_rate": 5.532372281234194e-06, "loss": 0.2162, "num_tokens": 1509144589.0, "step": 7913 }, { "epoch": 2.7011435398532173, "grad_norm": 0.22864378875699062, "learning_rate": 5.52604957005564e-06, "loss": 0.2132, "num_tokens": 1509337457.0, "step": 7914 }, { "epoch": 2.701484895033282, "grad_norm": 0.21418016560809144, "learning_rate": 5.519726858877087e-06, "loss": 0.2562, "num_tokens": 1509580325.0, "step": 7915 }, { "epoch": 2.701826250213347, "grad_norm": 0.26517181108855764, "learning_rate": 5.513404147698533e-06, "loss": 0.2521, "num_tokens": 1509744940.0, "step": 7916 }, { "epoch": 2.702167605393412, "grad_norm": 0.21277677709335674, "learning_rate": 5.50708143651998e-06, "loss": 0.2168, "num_tokens": 1509931426.0, "step": 7917 }, { "epoch": 2.7025089605734767, "grad_norm": 0.21858045671870788, "learning_rate": 5.500758725341427e-06, "loss": 0.2295, "num_tokens": 1510143665.0, "step": 7918 }, { "epoch": 2.7028503157535413, "grad_norm": 0.22898709043802706, "learning_rate": 5.494436014162873e-06, "loss": 0.2066, "num_tokens": 1510323636.0, "step": 7919 }, { "epoch": 2.7031916709336063, "grad_norm": 0.20275499839576164, "learning_rate": 5.48811330298432e-06, "loss": 0.2338, "num_tokens": 1510564232.0, "step": 7920 }, { "epoch": 2.7035330261136714, "grad_norm": 0.2229843190376849, "learning_rate": 5.481790591805766e-06, "loss": 0.2245, "num_tokens": 1510766133.0, "step": 7921 }, { "epoch": 2.703874381293736, "grad_norm": 0.2514796372320386, "learning_rate": 5.4754678806272125e-06, "loss": 0.2245, "num_tokens": 1510931466.0, "step": 7922 }, { "epoch": 2.704215736473801, "grad_norm": 0.214630687565626, "learning_rate": 5.4691451694486595e-06, "loss": 0.2053, "num_tokens": 1511122767.0, "step": 7923 }, { "epoch": 2.7045570916538657, "grad_norm": 0.22415114769629188, "learning_rate": 5.4628224582701065e-06, "loss": 0.2129, "num_tokens": 1511306330.0, "step": 7924 }, { "epoch": 2.7048984468339308, "grad_norm": 0.2420297153951012, "learning_rate": 5.456499747091553e-06, "loss": 0.2267, "num_tokens": 1511531007.0, "step": 7925 }, { "epoch": 2.7052398020139954, "grad_norm": 0.23877835906983805, "learning_rate": 5.450177035913e-06, "loss": 0.2382, "num_tokens": 1511746807.0, "step": 7926 }, { "epoch": 2.7055811571940604, "grad_norm": 0.22226563941150382, "learning_rate": 5.443854324734447e-06, "loss": 0.2321, "num_tokens": 1511962037.0, "step": 7927 }, { "epoch": 2.7059225123741255, "grad_norm": 0.21966066996472586, "learning_rate": 5.437531613555893e-06, "loss": 0.2547, "num_tokens": 1512197657.0, "step": 7928 }, { "epoch": 2.70626386755419, "grad_norm": 0.2186134177769229, "learning_rate": 5.43120890237734e-06, "loss": 0.2144, "num_tokens": 1512403253.0, "step": 7929 }, { "epoch": 2.7066052227342547, "grad_norm": 0.22164215025441425, "learning_rate": 5.424886191198787e-06, "loss": 0.2114, "num_tokens": 1512594993.0, "step": 7930 }, { "epoch": 2.70694657791432, "grad_norm": 0.27173111004884365, "learning_rate": 5.418563480020233e-06, "loss": 0.2331, "num_tokens": 1512736666.0, "step": 7931 }, { "epoch": 2.707287933094385, "grad_norm": 0.24553489905481565, "learning_rate": 5.41224076884168e-06, "loss": 0.2472, "num_tokens": 1512927103.0, "step": 7932 }, { "epoch": 2.7076292882744495, "grad_norm": 0.24452346002916728, "learning_rate": 5.405918057663127e-06, "loss": 0.2435, "num_tokens": 1513120604.0, "step": 7933 }, { "epoch": 2.7079706434545145, "grad_norm": 0.22771127463276478, "learning_rate": 5.399595346484573e-06, "loss": 0.2225, "num_tokens": 1513315468.0, "step": 7934 }, { "epoch": 2.708311998634579, "grad_norm": 0.24940824171630024, "learning_rate": 5.39327263530602e-06, "loss": 0.2105, "num_tokens": 1513485934.0, "step": 7935 }, { "epoch": 2.7086533538146442, "grad_norm": 0.28558766961925786, "learning_rate": 5.386949924127466e-06, "loss": 0.2307, "num_tokens": 1513705530.0, "step": 7936 }, { "epoch": 2.708994708994709, "grad_norm": 0.2159975774377921, "learning_rate": 5.380627212948912e-06, "loss": 0.2339, "num_tokens": 1513915695.0, "step": 7937 }, { "epoch": 2.709336064174774, "grad_norm": 0.22353706183921002, "learning_rate": 5.374304501770359e-06, "loss": 0.1893, "num_tokens": 1514097240.0, "step": 7938 }, { "epoch": 2.709677419354839, "grad_norm": 0.2424303353801516, "learning_rate": 5.367981790591806e-06, "loss": 0.2054, "num_tokens": 1514259708.0, "step": 7939 }, { "epoch": 2.7100187745349036, "grad_norm": 0.2328679703710786, "learning_rate": 5.361659079413252e-06, "loss": 0.2253, "num_tokens": 1514439278.0, "step": 7940 }, { "epoch": 2.710360129714968, "grad_norm": 0.22997246426923584, "learning_rate": 5.355336368234699e-06, "loss": 0.2102, "num_tokens": 1514641790.0, "step": 7941 }, { "epoch": 2.7107014848950333, "grad_norm": 0.24070077697193695, "learning_rate": 5.349013657056146e-06, "loss": 0.2368, "num_tokens": 1514836552.0, "step": 7942 }, { "epoch": 2.7110428400750983, "grad_norm": 0.254118277554467, "learning_rate": 5.342690945877592e-06, "loss": 0.2269, "num_tokens": 1515029902.0, "step": 7943 }, { "epoch": 2.711384195255163, "grad_norm": 0.2455528188346371, "learning_rate": 5.336368234699039e-06, "loss": 0.2102, "num_tokens": 1515218204.0, "step": 7944 }, { "epoch": 2.711725550435228, "grad_norm": 0.24827325035411776, "learning_rate": 5.330045523520486e-06, "loss": 0.223, "num_tokens": 1515409305.0, "step": 7945 }, { "epoch": 2.7120669056152926, "grad_norm": 0.20027409395877246, "learning_rate": 5.3237228123419325e-06, "loss": 0.2045, "num_tokens": 1515625941.0, "step": 7946 }, { "epoch": 2.7124082607953577, "grad_norm": 0.25529560718349015, "learning_rate": 5.3174001011633795e-06, "loss": 0.2007, "num_tokens": 1515773780.0, "step": 7947 }, { "epoch": 2.7127496159754223, "grad_norm": 0.23295275882898106, "learning_rate": 5.311077389984826e-06, "loss": 0.245, "num_tokens": 1515976491.0, "step": 7948 }, { "epoch": 2.7130909711554874, "grad_norm": 0.2521440706952279, "learning_rate": 5.304754678806273e-06, "loss": 0.2473, "num_tokens": 1516165757.0, "step": 7949 }, { "epoch": 2.713432326335552, "grad_norm": 0.2803948503427295, "learning_rate": 5.298431967627719e-06, "loss": 0.2168, "num_tokens": 1516393663.0, "step": 7950 }, { "epoch": 2.713773681515617, "grad_norm": 0.2275925409905139, "learning_rate": 5.292109256449166e-06, "loss": 0.221, "num_tokens": 1516583495.0, "step": 7951 }, { "epoch": 2.7141150366956817, "grad_norm": 0.23505060901604788, "learning_rate": 5.285786545270612e-06, "loss": 0.2287, "num_tokens": 1516778225.0, "step": 7952 }, { "epoch": 2.7144563918757467, "grad_norm": 0.23244967565319619, "learning_rate": 5.279463834092059e-06, "loss": 0.242, "num_tokens": 1517000103.0, "step": 7953 }, { "epoch": 2.714797747055812, "grad_norm": 0.23033960583336843, "learning_rate": 5.273141122913505e-06, "loss": 0.2253, "num_tokens": 1517199653.0, "step": 7954 }, { "epoch": 2.7151391022358764, "grad_norm": 0.22262307278224144, "learning_rate": 5.266818411734952e-06, "loss": 0.2257, "num_tokens": 1517400247.0, "step": 7955 }, { "epoch": 2.715480457415941, "grad_norm": 0.21209981669582534, "learning_rate": 5.260495700556399e-06, "loss": 0.1977, "num_tokens": 1517594467.0, "step": 7956 }, { "epoch": 2.715821812596006, "grad_norm": 0.25252181347491226, "learning_rate": 5.254172989377845e-06, "loss": 0.2153, "num_tokens": 1517777227.0, "step": 7957 }, { "epoch": 2.716163167776071, "grad_norm": 0.2352241559262148, "learning_rate": 5.247850278199292e-06, "loss": 0.2046, "num_tokens": 1517931934.0, "step": 7958 }, { "epoch": 2.7165045229561358, "grad_norm": 0.23834439372879726, "learning_rate": 5.241527567020739e-06, "loss": 0.2317, "num_tokens": 1518133968.0, "step": 7959 }, { "epoch": 2.716845878136201, "grad_norm": 0.25275345568235397, "learning_rate": 5.235204855842185e-06, "loss": 0.2255, "num_tokens": 1518313557.0, "step": 7960 }, { "epoch": 2.7171872333162654, "grad_norm": 0.2391355646885284, "learning_rate": 5.228882144663632e-06, "loss": 0.2417, "num_tokens": 1518507150.0, "step": 7961 }, { "epoch": 2.7175285884963305, "grad_norm": 0.21310804107924974, "learning_rate": 5.222559433485079e-06, "loss": 0.2133, "num_tokens": 1518732761.0, "step": 7962 }, { "epoch": 2.717869943676395, "grad_norm": 0.2412045979015483, "learning_rate": 5.216236722306525e-06, "loss": 0.2153, "num_tokens": 1518919356.0, "step": 7963 }, { "epoch": 2.71821129885646, "grad_norm": 0.2519622511771961, "learning_rate": 5.209914011127972e-06, "loss": 0.2399, "num_tokens": 1519110522.0, "step": 7964 }, { "epoch": 2.7185526540365252, "grad_norm": 0.24110308305601905, "learning_rate": 5.203591299949418e-06, "loss": 0.225, "num_tokens": 1519280419.0, "step": 7965 }, { "epoch": 2.71889400921659, "grad_norm": 0.21964647523565725, "learning_rate": 5.197268588770865e-06, "loss": 0.2322, "num_tokens": 1519485338.0, "step": 7966 }, { "epoch": 2.7192353643966545, "grad_norm": 0.26993224337552074, "learning_rate": 5.1909458775923115e-06, "loss": 0.2311, "num_tokens": 1519668521.0, "step": 7967 }, { "epoch": 2.7195767195767195, "grad_norm": 0.21367083278540616, "learning_rate": 5.1846231664137585e-06, "loss": 0.2218, "num_tokens": 1519885470.0, "step": 7968 }, { "epoch": 2.7199180747567846, "grad_norm": 0.22783676562793476, "learning_rate": 5.178300455235205e-06, "loss": 0.2271, "num_tokens": 1520088888.0, "step": 7969 }, { "epoch": 2.720259429936849, "grad_norm": 0.2417806590766736, "learning_rate": 5.171977744056652e-06, "loss": 0.22, "num_tokens": 1520253921.0, "step": 7970 }, { "epoch": 2.7206007851169143, "grad_norm": 0.259620152059068, "learning_rate": 5.165655032878099e-06, "loss": 0.2511, "num_tokens": 1520451858.0, "step": 7971 }, { "epoch": 2.720942140296979, "grad_norm": 0.264317887348793, "learning_rate": 5.159332321699545e-06, "loss": 0.2069, "num_tokens": 1520612797.0, "step": 7972 }, { "epoch": 2.721283495477044, "grad_norm": 0.2300137938861957, "learning_rate": 5.153009610520992e-06, "loss": 0.2238, "num_tokens": 1520799185.0, "step": 7973 }, { "epoch": 2.7216248506571086, "grad_norm": 0.23321214438763732, "learning_rate": 5.146686899342439e-06, "loss": 0.2096, "num_tokens": 1520977092.0, "step": 7974 }, { "epoch": 2.7219662058371736, "grad_norm": 0.23004592391577655, "learning_rate": 5.140364188163885e-06, "loss": 0.2135, "num_tokens": 1521186774.0, "step": 7975 }, { "epoch": 2.7223075610172387, "grad_norm": 0.24701726291676607, "learning_rate": 5.134041476985332e-06, "loss": 0.2322, "num_tokens": 1521373260.0, "step": 7976 }, { "epoch": 2.7226489161973033, "grad_norm": 0.22643767903383327, "learning_rate": 5.127718765806779e-06, "loss": 0.2469, "num_tokens": 1521581608.0, "step": 7977 }, { "epoch": 2.722990271377368, "grad_norm": 0.23889821617270682, "learning_rate": 5.121396054628225e-06, "loss": 0.201, "num_tokens": 1521748920.0, "step": 7978 }, { "epoch": 2.723331626557433, "grad_norm": 0.21735903373721524, "learning_rate": 5.115073343449672e-06, "loss": 0.2268, "num_tokens": 1521950819.0, "step": 7979 }, { "epoch": 2.723672981737498, "grad_norm": 0.22613167279899096, "learning_rate": 5.108750632271118e-06, "loss": 0.243, "num_tokens": 1522185537.0, "step": 7980 }, { "epoch": 2.7240143369175627, "grad_norm": 0.24298673256459993, "learning_rate": 5.102427921092565e-06, "loss": 0.2296, "num_tokens": 1522368136.0, "step": 7981 }, { "epoch": 2.7243556920976277, "grad_norm": 0.25298854771333434, "learning_rate": 5.096105209914011e-06, "loss": 0.2204, "num_tokens": 1522531588.0, "step": 7982 }, { "epoch": 2.7246970472776924, "grad_norm": 0.21400253021544932, "learning_rate": 5.089782498735457e-06, "loss": 0.2014, "num_tokens": 1522717515.0, "step": 7983 }, { "epoch": 2.7250384024577574, "grad_norm": 0.27194738364353643, "learning_rate": 5.083459787556904e-06, "loss": 0.2118, "num_tokens": 1522849494.0, "step": 7984 }, { "epoch": 2.725379757637822, "grad_norm": 0.22420298801768734, "learning_rate": 5.077137076378351e-06, "loss": 0.2226, "num_tokens": 1523057249.0, "step": 7985 }, { "epoch": 2.725721112817887, "grad_norm": 0.2341069684554765, "learning_rate": 5.070814365199797e-06, "loss": 0.2132, "num_tokens": 1523257583.0, "step": 7986 }, { "epoch": 2.7260624679979517, "grad_norm": 0.23389071116637863, "learning_rate": 5.064491654021244e-06, "loss": 0.2305, "num_tokens": 1523444590.0, "step": 7987 }, { "epoch": 2.7264038231780168, "grad_norm": 0.260969741374422, "learning_rate": 5.058168942842691e-06, "loss": 0.1742, "num_tokens": 1523567931.0, "step": 7988 }, { "epoch": 2.7267451783580814, "grad_norm": 0.23568107650588846, "learning_rate": 5.0518462316641375e-06, "loss": 0.253, "num_tokens": 1523775760.0, "step": 7989 }, { "epoch": 2.7270865335381465, "grad_norm": 0.2426604383538978, "learning_rate": 5.0455235204855845e-06, "loss": 0.2315, "num_tokens": 1523964694.0, "step": 7990 }, { "epoch": 2.7274278887182115, "grad_norm": 0.2441025986398286, "learning_rate": 5.0392008093070315e-06, "loss": 0.2234, "num_tokens": 1524128866.0, "step": 7991 }, { "epoch": 2.727769243898276, "grad_norm": 0.2523567396025027, "learning_rate": 5.032878098128478e-06, "loss": 0.2121, "num_tokens": 1524297899.0, "step": 7992 }, { "epoch": 2.7281105990783407, "grad_norm": 0.21694600037595405, "learning_rate": 5.026555386949925e-06, "loss": 0.2303, "num_tokens": 1524523934.0, "step": 7993 }, { "epoch": 2.728451954258406, "grad_norm": 0.24852552298766398, "learning_rate": 5.020232675771372e-06, "loss": 0.2401, "num_tokens": 1524699016.0, "step": 7994 }, { "epoch": 2.728793309438471, "grad_norm": 0.226139610527524, "learning_rate": 5.013909964592818e-06, "loss": 0.2629, "num_tokens": 1524945388.0, "step": 7995 }, { "epoch": 2.7291346646185355, "grad_norm": 0.23849749147482294, "learning_rate": 5.007587253414264e-06, "loss": 0.2244, "num_tokens": 1525126929.0, "step": 7996 }, { "epoch": 2.7294760197986006, "grad_norm": 0.24766787320849423, "learning_rate": 5.001264542235711e-06, "loss": 0.2353, "num_tokens": 1525308090.0, "step": 7997 }, { "epoch": 2.729817374978665, "grad_norm": 0.22192623400951222, "learning_rate": 4.994941831057157e-06, "loss": 0.2174, "num_tokens": 1525513957.0, "step": 7998 }, { "epoch": 2.7301587301587302, "grad_norm": 0.24719742768529834, "learning_rate": 4.988619119878604e-06, "loss": 0.2205, "num_tokens": 1525709949.0, "step": 7999 }, { "epoch": 2.730500085338795, "grad_norm": 28.281663925140847, "learning_rate": 4.982296408700051e-06, "loss": 0.3136, "num_tokens": 1525889895.0, "step": 8000 }, { "epoch": 2.73084144051886, "grad_norm": 0.2676191249043078, "learning_rate": 4.975973697521497e-06, "loss": 0.2233, "num_tokens": 1526055417.0, "step": 8001 }, { "epoch": 2.731182795698925, "grad_norm": 0.2443007176644355, "learning_rate": 4.969650986342944e-06, "loss": 0.2203, "num_tokens": 1526223577.0, "step": 8002 }, { "epoch": 2.7315241508789896, "grad_norm": 0.2614983786624466, "learning_rate": 4.963328275164391e-06, "loss": 0.2234, "num_tokens": 1526414792.0, "step": 8003 }, { "epoch": 2.731865506059054, "grad_norm": 0.2321255488635939, "learning_rate": 4.957005563985837e-06, "loss": 0.242, "num_tokens": 1526608501.0, "step": 8004 }, { "epoch": 2.7322068612391193, "grad_norm": 0.24058341603492645, "learning_rate": 4.950682852807284e-06, "loss": 0.2251, "num_tokens": 1526805503.0, "step": 8005 }, { "epoch": 2.7325482164191843, "grad_norm": 0.23602468443419497, "learning_rate": 4.944360141628731e-06, "loss": 0.2225, "num_tokens": 1526996694.0, "step": 8006 }, { "epoch": 2.732889571599249, "grad_norm": 0.25317153501664974, "learning_rate": 4.938037430450177e-06, "loss": 0.213, "num_tokens": 1527165153.0, "step": 8007 }, { "epoch": 2.733230926779314, "grad_norm": 0.25243630266808104, "learning_rate": 4.931714719271624e-06, "loss": 0.2179, "num_tokens": 1527344045.0, "step": 8008 }, { "epoch": 2.7335722819593786, "grad_norm": 0.2133639997286995, "learning_rate": 4.925392008093071e-06, "loss": 0.2245, "num_tokens": 1527567373.0, "step": 8009 }, { "epoch": 2.7339136371394437, "grad_norm": 0.26329734630045337, "learning_rate": 4.919069296914517e-06, "loss": 0.2631, "num_tokens": 1527763193.0, "step": 8010 }, { "epoch": 2.7342549923195083, "grad_norm": 0.22503087383433015, "learning_rate": 4.9127465857359635e-06, "loss": 0.2526, "num_tokens": 1528020134.0, "step": 8011 }, { "epoch": 2.7345963474995734, "grad_norm": 0.2265362950381058, "learning_rate": 4.9064238745574105e-06, "loss": 0.2042, "num_tokens": 1528197695.0, "step": 8012 }, { "epoch": 2.7349377026796384, "grad_norm": 0.231394376460962, "learning_rate": 4.900101163378857e-06, "loss": 0.2327, "num_tokens": 1528408211.0, "step": 8013 }, { "epoch": 2.735279057859703, "grad_norm": 0.2471062534303578, "learning_rate": 4.893778452200304e-06, "loss": 0.2462, "num_tokens": 1528605993.0, "step": 8014 }, { "epoch": 2.7356204130397677, "grad_norm": 0.24557592061050706, "learning_rate": 4.88745574102175e-06, "loss": 0.2173, "num_tokens": 1528807384.0, "step": 8015 }, { "epoch": 2.7359617682198327, "grad_norm": 0.2294894173648543, "learning_rate": 4.881133029843197e-06, "loss": 0.2144, "num_tokens": 1528993721.0, "step": 8016 }, { "epoch": 2.736303123399898, "grad_norm": 0.2526729616079106, "learning_rate": 4.874810318664644e-06, "loss": 0.2255, "num_tokens": 1529179540.0, "step": 8017 }, { "epoch": 2.7366444785799624, "grad_norm": 0.2082194188900779, "learning_rate": 4.86848760748609e-06, "loss": 0.2478, "num_tokens": 1529416486.0, "step": 8018 }, { "epoch": 2.7369858337600275, "grad_norm": 0.28127834156769, "learning_rate": 4.862164896307537e-06, "loss": 0.2125, "num_tokens": 1529576626.0, "step": 8019 }, { "epoch": 2.737327188940092, "grad_norm": 0.22176755505927948, "learning_rate": 4.855842185128984e-06, "loss": 0.2342, "num_tokens": 1529792577.0, "step": 8020 }, { "epoch": 2.737668544120157, "grad_norm": 0.2196682682820878, "learning_rate": 4.84951947395043e-06, "loss": 0.2374, "num_tokens": 1530005065.0, "step": 8021 }, { "epoch": 2.7380098993002218, "grad_norm": 0.23193750704319688, "learning_rate": 4.843196762771877e-06, "loss": 0.2166, "num_tokens": 1530201954.0, "step": 8022 }, { "epoch": 2.738351254480287, "grad_norm": 0.25137173727783835, "learning_rate": 4.836874051593324e-06, "loss": 0.2356, "num_tokens": 1530384502.0, "step": 8023 }, { "epoch": 2.7386926096603514, "grad_norm": 0.21796583922683627, "learning_rate": 4.83055134041477e-06, "loss": 0.2206, "num_tokens": 1530588690.0, "step": 8024 }, { "epoch": 2.7390339648404165, "grad_norm": 0.21609702713012355, "learning_rate": 4.824228629236217e-06, "loss": 0.2399, "num_tokens": 1530792092.0, "step": 8025 }, { "epoch": 2.739375320020481, "grad_norm": 0.2420322750890476, "learning_rate": 4.817905918057663e-06, "loss": 0.2475, "num_tokens": 1530984643.0, "step": 8026 }, { "epoch": 2.739716675200546, "grad_norm": 0.2164247092523772, "learning_rate": 4.811583206879109e-06, "loss": 0.2592, "num_tokens": 1531220587.0, "step": 8027 }, { "epoch": 2.7400580303806112, "grad_norm": 0.23079962334334192, "learning_rate": 4.805260495700556e-06, "loss": 0.2277, "num_tokens": 1531419668.0, "step": 8028 }, { "epoch": 2.740399385560676, "grad_norm": 0.2548243612780488, "learning_rate": 4.798937784522003e-06, "loss": 0.2017, "num_tokens": 1531574694.0, "step": 8029 }, { "epoch": 2.7407407407407405, "grad_norm": 0.21392367126143047, "learning_rate": 4.7926150733434494e-06, "loss": 0.2284, "num_tokens": 1531788512.0, "step": 8030 }, { "epoch": 2.7410820959208055, "grad_norm": 0.22922284992106218, "learning_rate": 4.786292362164896e-06, "loss": 0.2286, "num_tokens": 1531988088.0, "step": 8031 }, { "epoch": 2.7414234511008706, "grad_norm": 0.2562451089831053, "learning_rate": 4.779969650986343e-06, "loss": 0.2, "num_tokens": 1532184269.0, "step": 8032 }, { "epoch": 2.741764806280935, "grad_norm": 0.2680739068892341, "learning_rate": 4.7736469398077895e-06, "loss": 0.226, "num_tokens": 1532344908.0, "step": 8033 }, { "epoch": 2.7421061614610003, "grad_norm": 0.23138224504220573, "learning_rate": 4.7673242286292365e-06, "loss": 0.2051, "num_tokens": 1532512801.0, "step": 8034 }, { "epoch": 2.742447516641065, "grad_norm": 0.22456150617050635, "learning_rate": 4.7610015174506835e-06, "loss": 0.2202, "num_tokens": 1532696317.0, "step": 8035 }, { "epoch": 2.74278887182113, "grad_norm": 0.22511056983440894, "learning_rate": 4.75467880627213e-06, "loss": 0.2255, "num_tokens": 1532885045.0, "step": 8036 }, { "epoch": 2.7431302270011946, "grad_norm": 0.2431062703219955, "learning_rate": 4.748356095093577e-06, "loss": 0.2241, "num_tokens": 1533049743.0, "step": 8037 }, { "epoch": 2.7434715821812596, "grad_norm": 0.23987554069404537, "learning_rate": 4.742033383915024e-06, "loss": 0.2304, "num_tokens": 1533232639.0, "step": 8038 }, { "epoch": 2.7438129373613247, "grad_norm": 0.24976980791745285, "learning_rate": 4.73571067273647e-06, "loss": 0.2241, "num_tokens": 1533413066.0, "step": 8039 }, { "epoch": 2.7441542925413893, "grad_norm": 0.21820845198085553, "learning_rate": 4.729387961557917e-06, "loss": 0.2203, "num_tokens": 1533607735.0, "step": 8040 }, { "epoch": 2.744495647721454, "grad_norm": 0.24939836795297823, "learning_rate": 4.723065250379363e-06, "loss": 0.2291, "num_tokens": 1533781169.0, "step": 8041 }, { "epoch": 2.744837002901519, "grad_norm": 0.22794189265810672, "learning_rate": 4.716742539200809e-06, "loss": 0.2346, "num_tokens": 1533984417.0, "step": 8042 }, { "epoch": 2.745178358081584, "grad_norm": 0.2543757665588318, "learning_rate": 4.710419828022256e-06, "loss": 0.2249, "num_tokens": 1534165550.0, "step": 8043 }, { "epoch": 2.7455197132616487, "grad_norm": 0.2539447056089071, "learning_rate": 4.704097116843703e-06, "loss": 0.2217, "num_tokens": 1534327698.0, "step": 8044 }, { "epoch": 2.7458610684417137, "grad_norm": 0.23191824573176084, "learning_rate": 4.697774405665149e-06, "loss": 0.1906, "num_tokens": 1534499676.0, "step": 8045 }, { "epoch": 2.7462024236217784, "grad_norm": 0.22697689327147136, "learning_rate": 4.691451694486596e-06, "loss": 0.2233, "num_tokens": 1534699226.0, "step": 8046 }, { "epoch": 2.7465437788018434, "grad_norm": 0.26866824028030356, "learning_rate": 4.685128983308042e-06, "loss": 0.2309, "num_tokens": 1534892171.0, "step": 8047 }, { "epoch": 2.746885133981908, "grad_norm": 0.24769830167914136, "learning_rate": 4.678806272129489e-06, "loss": 0.1954, "num_tokens": 1535057086.0, "step": 8048 }, { "epoch": 2.747226489161973, "grad_norm": 0.22268883320737287, "learning_rate": 4.672483560950936e-06, "loss": 0.2217, "num_tokens": 1535261817.0, "step": 8049 }, { "epoch": 2.747567844342038, "grad_norm": 0.249534266387876, "learning_rate": 4.666160849772382e-06, "loss": 0.2217, "num_tokens": 1535411874.0, "step": 8050 }, { "epoch": 2.7479091995221028, "grad_norm": 0.22378758589019235, "learning_rate": 4.659838138593829e-06, "loss": 0.2182, "num_tokens": 1535611276.0, "step": 8051 }, { "epoch": 2.7482505547021674, "grad_norm": 0.23708902173587454, "learning_rate": 4.653515427415276e-06, "loss": 0.2168, "num_tokens": 1535795948.0, "step": 8052 }, { "epoch": 2.7485919098822325, "grad_norm": 0.22795425804186892, "learning_rate": 4.647192716236722e-06, "loss": 0.2256, "num_tokens": 1535987444.0, "step": 8053 }, { "epoch": 2.7489332650622975, "grad_norm": 0.23576086808720342, "learning_rate": 4.640870005058169e-06, "loss": 0.2088, "num_tokens": 1536167919.0, "step": 8054 }, { "epoch": 2.749274620242362, "grad_norm": 0.2309484181246441, "learning_rate": 4.634547293879616e-06, "loss": 0.215, "num_tokens": 1536361991.0, "step": 8055 }, { "epoch": 2.749615975422427, "grad_norm": 0.21685109183327286, "learning_rate": 4.6282245827010625e-06, "loss": 0.2108, "num_tokens": 1536558184.0, "step": 8056 }, { "epoch": 2.749957330602492, "grad_norm": 0.22272850718888862, "learning_rate": 4.621901871522509e-06, "loss": 0.2224, "num_tokens": 1536789159.0, "step": 8057 }, { "epoch": 2.750298685782557, "grad_norm": 0.22344701304075665, "learning_rate": 4.615579160343956e-06, "loss": 0.2173, "num_tokens": 1537004949.0, "step": 8058 }, { "epoch": 2.7506400409626215, "grad_norm": 0.2347741851381581, "learning_rate": 4.609256449165402e-06, "loss": 0.238, "num_tokens": 1537216409.0, "step": 8059 }, { "epoch": 2.7509813961426866, "grad_norm": 0.1892856054688313, "learning_rate": 4.602933737986849e-06, "loss": 0.224, "num_tokens": 1537467070.0, "step": 8060 }, { "epoch": 2.751322751322751, "grad_norm": 0.21850273144922838, "learning_rate": 4.596611026808296e-06, "loss": 0.2201, "num_tokens": 1537676422.0, "step": 8061 }, { "epoch": 2.7516641065028162, "grad_norm": 0.2436080561650763, "learning_rate": 4.590288315629742e-06, "loss": 0.2182, "num_tokens": 1537866047.0, "step": 8062 }, { "epoch": 2.752005461682881, "grad_norm": 0.2289807700015295, "learning_rate": 4.583965604451189e-06, "loss": 0.235, "num_tokens": 1538072836.0, "step": 8063 }, { "epoch": 2.752346816862946, "grad_norm": 0.2225865676136848, "learning_rate": 4.577642893272636e-06, "loss": 0.2516, "num_tokens": 1538296417.0, "step": 8064 }, { "epoch": 2.752688172043011, "grad_norm": 0.21563930638922643, "learning_rate": 4.571320182094082e-06, "loss": 0.2293, "num_tokens": 1538501064.0, "step": 8065 }, { "epoch": 2.7530295272230756, "grad_norm": 0.21816256023647923, "learning_rate": 4.564997470915529e-06, "loss": 0.2484, "num_tokens": 1538748179.0, "step": 8066 }, { "epoch": 2.75337088240314, "grad_norm": 0.23183863603409524, "learning_rate": 4.558674759736976e-06, "loss": 0.2401, "num_tokens": 1538948708.0, "step": 8067 }, { "epoch": 2.7537122375832053, "grad_norm": 0.22015024162285574, "learning_rate": 4.552352048558422e-06, "loss": 0.2336, "num_tokens": 1539173077.0, "step": 8068 }, { "epoch": 2.7540535927632703, "grad_norm": 0.2513787862215842, "learning_rate": 4.546029337379869e-06, "loss": 0.2436, "num_tokens": 1539361493.0, "step": 8069 }, { "epoch": 2.754394947943335, "grad_norm": 0.22049986349219497, "learning_rate": 4.539706626201316e-06, "loss": 0.2209, "num_tokens": 1539559362.0, "step": 8070 }, { "epoch": 2.7547363031234, "grad_norm": 0.252341267226495, "learning_rate": 4.533383915022762e-06, "loss": 0.2058, "num_tokens": 1539735392.0, "step": 8071 }, { "epoch": 2.7550776583034646, "grad_norm": 0.22530101424766033, "learning_rate": 4.527061203844208e-06, "loss": 0.2464, "num_tokens": 1539939892.0, "step": 8072 }, { "epoch": 2.7554190134835297, "grad_norm": 0.2669219827033012, "learning_rate": 4.520738492665655e-06, "loss": 0.2182, "num_tokens": 1540120577.0, "step": 8073 }, { "epoch": 2.7557603686635943, "grad_norm": 0.2230636846713324, "learning_rate": 4.5144157814871014e-06, "loss": 0.2359, "num_tokens": 1540323825.0, "step": 8074 }, { "epoch": 2.7561017238436594, "grad_norm": 0.21385544988170213, "learning_rate": 4.508093070308548e-06, "loss": 0.2524, "num_tokens": 1540559906.0, "step": 8075 }, { "epoch": 2.7564430790237244, "grad_norm": 0.26017785095981455, "learning_rate": 4.501770359129995e-06, "loss": 0.2118, "num_tokens": 1540716152.0, "step": 8076 }, { "epoch": 2.756784434203789, "grad_norm": 0.2503776021889231, "learning_rate": 4.4954476479514415e-06, "loss": 0.2086, "num_tokens": 1540878145.0, "step": 8077 }, { "epoch": 2.7571257893838537, "grad_norm": 0.2148631909321196, "learning_rate": 4.4891249367728885e-06, "loss": 0.2086, "num_tokens": 1541079780.0, "step": 8078 }, { "epoch": 2.7574671445639187, "grad_norm": 0.22440428391442793, "learning_rate": 4.4828022255943355e-06, "loss": 0.2164, "num_tokens": 1541284350.0, "step": 8079 }, { "epoch": 2.757808499743984, "grad_norm": 0.2427895998556008, "learning_rate": 4.476479514415782e-06, "loss": 0.2254, "num_tokens": 1541478746.0, "step": 8080 }, { "epoch": 2.7581498549240484, "grad_norm": 0.2425848401529296, "learning_rate": 4.470156803237229e-06, "loss": 0.2121, "num_tokens": 1541663687.0, "step": 8081 }, { "epoch": 2.7584912101041135, "grad_norm": 0.32047947187322756, "learning_rate": 4.463834092058675e-06, "loss": 0.2392, "num_tokens": 1541845076.0, "step": 8082 }, { "epoch": 2.758832565284178, "grad_norm": 0.2503853381982205, "learning_rate": 4.457511380880122e-06, "loss": 0.2009, "num_tokens": 1542011989.0, "step": 8083 }, { "epoch": 2.759173920464243, "grad_norm": 0.23540415744257775, "learning_rate": 4.451188669701569e-06, "loss": 0.2293, "num_tokens": 1542192447.0, "step": 8084 }, { "epoch": 2.7595152756443078, "grad_norm": 0.2397322424940664, "learning_rate": 4.444865958523015e-06, "loss": 0.2317, "num_tokens": 1542395740.0, "step": 8085 }, { "epoch": 2.759856630824373, "grad_norm": 0.22107650825052036, "learning_rate": 4.438543247344462e-06, "loss": 0.2461, "num_tokens": 1542622963.0, "step": 8086 }, { "epoch": 2.760197986004438, "grad_norm": 0.2649415294663349, "learning_rate": 4.432220536165908e-06, "loss": 0.2156, "num_tokens": 1542800974.0, "step": 8087 }, { "epoch": 2.7605393411845025, "grad_norm": 0.2332211036987627, "learning_rate": 4.425897824987354e-06, "loss": 0.2402, "num_tokens": 1542999783.0, "step": 8088 }, { "epoch": 2.760880696364567, "grad_norm": 0.22018424711363607, "learning_rate": 4.419575113808801e-06, "loss": 0.219, "num_tokens": 1543217215.0, "step": 8089 }, { "epoch": 2.761222051544632, "grad_norm": 0.2568043926606068, "learning_rate": 4.413252402630248e-06, "loss": 0.2137, "num_tokens": 1543392306.0, "step": 8090 }, { "epoch": 2.7615634067246972, "grad_norm": 0.22488349445842504, "learning_rate": 4.406929691451694e-06, "loss": 0.2535, "num_tokens": 1543627455.0, "step": 8091 }, { "epoch": 2.761904761904762, "grad_norm": 0.252075894740346, "learning_rate": 4.400606980273141e-06, "loss": 0.2279, "num_tokens": 1543811209.0, "step": 8092 }, { "epoch": 2.762246117084827, "grad_norm": 0.23221955586851542, "learning_rate": 4.394284269094588e-06, "loss": 0.2238, "num_tokens": 1543993394.0, "step": 8093 }, { "epoch": 2.7625874722648915, "grad_norm": 0.21311328225389164, "learning_rate": 4.387961557916034e-06, "loss": 0.2405, "num_tokens": 1544223017.0, "step": 8094 }, { "epoch": 2.7629288274449566, "grad_norm": 0.22025340409078167, "learning_rate": 4.381638846737481e-06, "loss": 0.2292, "num_tokens": 1544466351.0, "step": 8095 }, { "epoch": 2.763270182625021, "grad_norm": 0.21929419859148291, "learning_rate": 4.375316135558928e-06, "loss": 0.2328, "num_tokens": 1544654907.0, "step": 8096 }, { "epoch": 2.7636115378050863, "grad_norm": 0.2409645417402925, "learning_rate": 4.368993424380374e-06, "loss": 0.2125, "num_tokens": 1544829911.0, "step": 8097 }, { "epoch": 2.763952892985151, "grad_norm": 0.23023223870849374, "learning_rate": 4.362670713201821e-06, "loss": 0.2247, "num_tokens": 1545030792.0, "step": 8098 }, { "epoch": 2.764294248165216, "grad_norm": 0.2232238987272846, "learning_rate": 4.356348002023268e-06, "loss": 0.2235, "num_tokens": 1545210336.0, "step": 8099 }, { "epoch": 2.7646356033452806, "grad_norm": 0.25754777081419744, "learning_rate": 4.3500252908447145e-06, "loss": 0.2376, "num_tokens": 1545364968.0, "step": 8100 }, { "epoch": 2.7649769585253456, "grad_norm": 0.22794746767603646, "learning_rate": 4.3437025796661615e-06, "loss": 0.2233, "num_tokens": 1545541723.0, "step": 8101 }, { "epoch": 2.7653183137054107, "grad_norm": 0.22164034170595232, "learning_rate": 4.337379868487608e-06, "loss": 0.2211, "num_tokens": 1545740263.0, "step": 8102 }, { "epoch": 2.7656596688854753, "grad_norm": 0.23202497767093513, "learning_rate": 4.331057157309054e-06, "loss": 0.2223, "num_tokens": 1545924142.0, "step": 8103 }, { "epoch": 2.76600102406554, "grad_norm": 0.25934449140818616, "learning_rate": 4.324734446130501e-06, "loss": 0.2225, "num_tokens": 1546086629.0, "step": 8104 }, { "epoch": 2.766342379245605, "grad_norm": 0.21192448356118754, "learning_rate": 4.318411734951948e-06, "loss": 0.2256, "num_tokens": 1546304960.0, "step": 8105 }, { "epoch": 2.76668373442567, "grad_norm": 0.23339263337302676, "learning_rate": 4.312089023773394e-06, "loss": 0.2349, "num_tokens": 1546499926.0, "step": 8106 }, { "epoch": 2.7670250896057347, "grad_norm": 0.24351041051680608, "learning_rate": 4.305766312594841e-06, "loss": 0.2303, "num_tokens": 1546689480.0, "step": 8107 }, { "epoch": 2.7673664447857997, "grad_norm": 0.21632340631717567, "learning_rate": 4.299443601416288e-06, "loss": 0.2276, "num_tokens": 1546904718.0, "step": 8108 }, { "epoch": 2.7677077999658644, "grad_norm": 0.22447204546767635, "learning_rate": 4.293120890237734e-06, "loss": 0.2154, "num_tokens": 1547135055.0, "step": 8109 }, { "epoch": 2.7680491551459294, "grad_norm": 0.2157096588252174, "learning_rate": 4.286798179059181e-06, "loss": 0.2335, "num_tokens": 1547367831.0, "step": 8110 }, { "epoch": 2.768390510325994, "grad_norm": 0.21624490869070265, "learning_rate": 4.280475467880628e-06, "loss": 0.2124, "num_tokens": 1547573968.0, "step": 8111 }, { "epoch": 2.768731865506059, "grad_norm": 0.24637575755633412, "learning_rate": 4.274152756702074e-06, "loss": 0.2131, "num_tokens": 1547746209.0, "step": 8112 }, { "epoch": 2.769073220686124, "grad_norm": 0.22361431353143527, "learning_rate": 4.267830045523521e-06, "loss": 0.2262, "num_tokens": 1547947980.0, "step": 8113 }, { "epoch": 2.7694145758661888, "grad_norm": 0.23118438859759363, "learning_rate": 4.261507334344967e-06, "loss": 0.2317, "num_tokens": 1548150788.0, "step": 8114 }, { "epoch": 2.7697559310462534, "grad_norm": 0.22112020330043353, "learning_rate": 4.255184623166414e-06, "loss": 0.2547, "num_tokens": 1548364739.0, "step": 8115 }, { "epoch": 2.7700972862263185, "grad_norm": 0.23804318109882333, "learning_rate": 4.24886191198786e-06, "loss": 0.2402, "num_tokens": 1548554657.0, "step": 8116 }, { "epoch": 2.7704386414063835, "grad_norm": 0.2391285936805321, "learning_rate": 4.242539200809307e-06, "loss": 0.2328, "num_tokens": 1548736544.0, "step": 8117 }, { "epoch": 2.770779996586448, "grad_norm": 0.2389947860984895, "learning_rate": 4.2362164896307534e-06, "loss": 0.2078, "num_tokens": 1548902344.0, "step": 8118 }, { "epoch": 2.771121351766513, "grad_norm": 0.2412004095797822, "learning_rate": 4.2298937784522004e-06, "loss": 0.2254, "num_tokens": 1549105672.0, "step": 8119 }, { "epoch": 2.771462706946578, "grad_norm": 0.20876453661397706, "learning_rate": 4.2235710672736466e-06, "loss": 0.2485, "num_tokens": 1549346856.0, "step": 8120 }, { "epoch": 2.771804062126643, "grad_norm": 0.2384409252591195, "learning_rate": 4.2172483560950935e-06, "loss": 0.2298, "num_tokens": 1549535156.0, "step": 8121 }, { "epoch": 2.7721454173067075, "grad_norm": 0.20073250980070415, "learning_rate": 4.2109256449165405e-06, "loss": 0.2341, "num_tokens": 1549774991.0, "step": 8122 }, { "epoch": 2.7724867724867726, "grad_norm": 0.21270680773357084, "learning_rate": 4.204602933737987e-06, "loss": 0.2217, "num_tokens": 1549989672.0, "step": 8123 }, { "epoch": 2.7728281276668376, "grad_norm": 0.25624027778112357, "learning_rate": 4.198280222559434e-06, "loss": 0.2227, "num_tokens": 1550158015.0, "step": 8124 }, { "epoch": 2.7731694828469022, "grad_norm": 0.24403318079977537, "learning_rate": 4.191957511380881e-06, "loss": 0.2411, "num_tokens": 1550358230.0, "step": 8125 }, { "epoch": 2.773510838026967, "grad_norm": 0.23831079056832855, "learning_rate": 4.185634800202327e-06, "loss": 0.2402, "num_tokens": 1550566209.0, "step": 8126 }, { "epoch": 2.773852193207032, "grad_norm": 0.21352343759791095, "learning_rate": 4.179312089023774e-06, "loss": 0.2434, "num_tokens": 1550791175.0, "step": 8127 }, { "epoch": 2.774193548387097, "grad_norm": 0.2198513801926543, "learning_rate": 4.172989377845221e-06, "loss": 0.235, "num_tokens": 1551015529.0, "step": 8128 }, { "epoch": 2.7745349035671616, "grad_norm": 0.24036139010141294, "learning_rate": 4.166666666666667e-06, "loss": 0.213, "num_tokens": 1551183477.0, "step": 8129 }, { "epoch": 2.7748762587472267, "grad_norm": 0.21597447931547367, "learning_rate": 4.160343955488114e-06, "loss": 0.2322, "num_tokens": 1551405945.0, "step": 8130 }, { "epoch": 2.7752176139272913, "grad_norm": 0.21358957645227764, "learning_rate": 4.15402124430956e-06, "loss": 0.2573, "num_tokens": 1551639062.0, "step": 8131 }, { "epoch": 2.7755589691073563, "grad_norm": 0.23889841650211324, "learning_rate": 4.147698533131007e-06, "loss": 0.221, "num_tokens": 1551814750.0, "step": 8132 }, { "epoch": 2.775900324287421, "grad_norm": 0.22665182497586586, "learning_rate": 4.141375821952453e-06, "loss": 0.2183, "num_tokens": 1552017238.0, "step": 8133 }, { "epoch": 2.776241679467486, "grad_norm": 0.2271514917718234, "learning_rate": 4.1350531107739e-06, "loss": 0.1954, "num_tokens": 1552194230.0, "step": 8134 }, { "epoch": 2.7765830346475506, "grad_norm": 0.2703585058393545, "learning_rate": 4.128730399595346e-06, "loss": 0.1825, "num_tokens": 1552332059.0, "step": 8135 }, { "epoch": 2.7769243898276157, "grad_norm": 0.2535276047214926, "learning_rate": 4.122407688416793e-06, "loss": 0.2251, "num_tokens": 1552495795.0, "step": 8136 }, { "epoch": 2.7772657450076803, "grad_norm": 0.24545702312267356, "learning_rate": 4.11608497723824e-06, "loss": 0.2273, "num_tokens": 1552674841.0, "step": 8137 }, { "epoch": 2.7776071001877454, "grad_norm": 0.23724347946306337, "learning_rate": 4.109762266059686e-06, "loss": 0.2301, "num_tokens": 1552881305.0, "step": 8138 }, { "epoch": 2.7779484553678104, "grad_norm": 0.25426366912734655, "learning_rate": 4.103439554881133e-06, "loss": 0.2005, "num_tokens": 1553025925.0, "step": 8139 }, { "epoch": 2.778289810547875, "grad_norm": 0.23042712598472434, "learning_rate": 4.09711684370258e-06, "loss": 0.2454, "num_tokens": 1553222216.0, "step": 8140 }, { "epoch": 2.7786311657279397, "grad_norm": 0.2097720364103634, "learning_rate": 4.0907941325240264e-06, "loss": 0.23, "num_tokens": 1553446662.0, "step": 8141 }, { "epoch": 2.7789725209080047, "grad_norm": 0.2260142726375543, "learning_rate": 4.084471421345473e-06, "loss": 0.1967, "num_tokens": 1553630231.0, "step": 8142 }, { "epoch": 2.77931387608807, "grad_norm": 0.24067120501791212, "learning_rate": 4.07814871016692e-06, "loss": 0.2337, "num_tokens": 1553808814.0, "step": 8143 }, { "epoch": 2.7796552312681344, "grad_norm": 0.24810205765218418, "learning_rate": 4.0718259989883665e-06, "loss": 0.2145, "num_tokens": 1553988308.0, "step": 8144 }, { "epoch": 2.7799965864481995, "grad_norm": 0.22990591247701472, "learning_rate": 4.0655032878098135e-06, "loss": 0.2117, "num_tokens": 1554158938.0, "step": 8145 }, { "epoch": 2.780337941628264, "grad_norm": 0.2178078243399913, "learning_rate": 4.05918057663126e-06, "loss": 0.2638, "num_tokens": 1554395338.0, "step": 8146 }, { "epoch": 2.780679296808329, "grad_norm": 0.22868833293166105, "learning_rate": 4.052857865452707e-06, "loss": 0.2168, "num_tokens": 1554575321.0, "step": 8147 }, { "epoch": 2.7810206519883938, "grad_norm": 0.2317868816850867, "learning_rate": 4.046535154274153e-06, "loss": 0.217, "num_tokens": 1554781991.0, "step": 8148 }, { "epoch": 2.781362007168459, "grad_norm": 0.23391445120429408, "learning_rate": 4.040212443095599e-06, "loss": 0.2323, "num_tokens": 1554974046.0, "step": 8149 }, { "epoch": 2.781703362348524, "grad_norm": 0.26180605495503195, "learning_rate": 4.033889731917046e-06, "loss": 0.2189, "num_tokens": 1555136141.0, "step": 8150 }, { "epoch": 2.7820447175285885, "grad_norm": 0.25564291784017107, "learning_rate": 4.027567020738493e-06, "loss": 0.2196, "num_tokens": 1555329924.0, "step": 8151 }, { "epoch": 2.782386072708653, "grad_norm": 0.2383740605701598, "learning_rate": 4.021244309559939e-06, "loss": 0.2323, "num_tokens": 1555558315.0, "step": 8152 }, { "epoch": 2.782727427888718, "grad_norm": 0.20842267349817276, "learning_rate": 4.014921598381386e-06, "loss": 0.2143, "num_tokens": 1555768595.0, "step": 8153 }, { "epoch": 2.7830687830687832, "grad_norm": 0.23777786244818733, "learning_rate": 4.008598887202833e-06, "loss": 0.2179, "num_tokens": 1555953150.0, "step": 8154 }, { "epoch": 2.783410138248848, "grad_norm": 0.21118437633377138, "learning_rate": 4.002276176024279e-06, "loss": 0.2209, "num_tokens": 1556177006.0, "step": 8155 }, { "epoch": 2.783751493428913, "grad_norm": 0.26743374813127474, "learning_rate": 3.995953464845726e-06, "loss": 0.2107, "num_tokens": 1556321586.0, "step": 8156 }, { "epoch": 2.7840928486089775, "grad_norm": 0.2284554309122559, "learning_rate": 3.989630753667173e-06, "loss": 0.2217, "num_tokens": 1556507070.0, "step": 8157 }, { "epoch": 2.7844342037890426, "grad_norm": 0.23997048876406213, "learning_rate": 3.983308042488619e-06, "loss": 0.2479, "num_tokens": 1556699748.0, "step": 8158 }, { "epoch": 2.7847755589691072, "grad_norm": 0.2327125467774994, "learning_rate": 3.976985331310066e-06, "loss": 0.2028, "num_tokens": 1556867629.0, "step": 8159 }, { "epoch": 2.7851169141491723, "grad_norm": 0.23955674833444984, "learning_rate": 3.970662620131513e-06, "loss": 0.2281, "num_tokens": 1557062170.0, "step": 8160 }, { "epoch": 2.7854582693292373, "grad_norm": 0.23795584920426335, "learning_rate": 3.964339908952959e-06, "loss": 0.2358, "num_tokens": 1557248244.0, "step": 8161 }, { "epoch": 2.785799624509302, "grad_norm": 0.22769188057688758, "learning_rate": 3.9580171977744054e-06, "loss": 0.2064, "num_tokens": 1557420478.0, "step": 8162 }, { "epoch": 2.7861409796893666, "grad_norm": 0.25098448090087055, "learning_rate": 3.9516944865958524e-06, "loss": 0.2476, "num_tokens": 1557628323.0, "step": 8163 }, { "epoch": 2.7864823348694316, "grad_norm": 0.2298823331648334, "learning_rate": 3.9453717754172986e-06, "loss": 0.2192, "num_tokens": 1557821413.0, "step": 8164 }, { "epoch": 2.7868236900494967, "grad_norm": 0.22565014626346877, "learning_rate": 3.9390490642387456e-06, "loss": 0.235, "num_tokens": 1558024758.0, "step": 8165 }, { "epoch": 2.7871650452295613, "grad_norm": 0.23618539203511665, "learning_rate": 3.9327263530601925e-06, "loss": 0.2222, "num_tokens": 1558222740.0, "step": 8166 }, { "epoch": 2.787506400409626, "grad_norm": 0.24769392211403637, "learning_rate": 3.926403641881639e-06, "loss": 0.2349, "num_tokens": 1558402860.0, "step": 8167 }, { "epoch": 2.787847755589691, "grad_norm": 0.25749578082644803, "learning_rate": 3.920080930703086e-06, "loss": 0.2235, "num_tokens": 1558576278.0, "step": 8168 }, { "epoch": 2.788189110769756, "grad_norm": 0.24119751612108697, "learning_rate": 3.913758219524533e-06, "loss": 0.224, "num_tokens": 1558760115.0, "step": 8169 }, { "epoch": 2.7885304659498207, "grad_norm": 0.21316810851406331, "learning_rate": 3.907435508345979e-06, "loss": 0.2331, "num_tokens": 1558982422.0, "step": 8170 }, { "epoch": 2.7888718211298857, "grad_norm": 0.23890574152828628, "learning_rate": 3.901112797167426e-06, "loss": 0.2381, "num_tokens": 1559188362.0, "step": 8171 }, { "epoch": 2.7892131763099504, "grad_norm": 0.23099312130395847, "learning_rate": 3.894790085988873e-06, "loss": 0.2316, "num_tokens": 1559377039.0, "step": 8172 }, { "epoch": 2.7895545314900154, "grad_norm": 0.2118213229541132, "learning_rate": 3.888467374810319e-06, "loss": 0.2254, "num_tokens": 1559583575.0, "step": 8173 }, { "epoch": 2.78989588667008, "grad_norm": 0.24485010660886128, "learning_rate": 3.882144663631766e-06, "loss": 0.2277, "num_tokens": 1559785951.0, "step": 8174 }, { "epoch": 2.790237241850145, "grad_norm": 0.22781520265196253, "learning_rate": 3.875821952453213e-06, "loss": 0.2006, "num_tokens": 1559973435.0, "step": 8175 }, { "epoch": 2.79057859703021, "grad_norm": 0.228757316274315, "learning_rate": 3.869499241274659e-06, "loss": 0.23, "num_tokens": 1560173330.0, "step": 8176 }, { "epoch": 2.790919952210275, "grad_norm": 0.24233094424833385, "learning_rate": 3.863176530096105e-06, "loss": 0.2282, "num_tokens": 1560356413.0, "step": 8177 }, { "epoch": 2.7912613073903394, "grad_norm": 0.22102060918266628, "learning_rate": 3.856853818917552e-06, "loss": 0.2244, "num_tokens": 1560576075.0, "step": 8178 }, { "epoch": 2.7916026625704045, "grad_norm": 0.2351276616798314, "learning_rate": 3.850531107738998e-06, "loss": 0.2327, "num_tokens": 1560763690.0, "step": 8179 }, { "epoch": 2.7919440177504695, "grad_norm": 0.2343019769435166, "learning_rate": 3.844208396560445e-06, "loss": 0.2108, "num_tokens": 1560946786.0, "step": 8180 }, { "epoch": 2.792285372930534, "grad_norm": 0.21828418556137502, "learning_rate": 3.837885685381891e-06, "loss": 0.2385, "num_tokens": 1561183147.0, "step": 8181 }, { "epoch": 2.792626728110599, "grad_norm": 0.25170776122238664, "learning_rate": 3.831562974203338e-06, "loss": 0.2415, "num_tokens": 1561356914.0, "step": 8182 }, { "epoch": 2.792968083290664, "grad_norm": 0.2321505187076624, "learning_rate": 3.825240263024785e-06, "loss": 0.2259, "num_tokens": 1561551315.0, "step": 8183 }, { "epoch": 2.793309438470729, "grad_norm": 0.24873464051185812, "learning_rate": 3.8189175518462315e-06, "loss": 0.2068, "num_tokens": 1561744361.0, "step": 8184 }, { "epoch": 2.7936507936507935, "grad_norm": 0.23454624532919316, "learning_rate": 3.8125948406676784e-06, "loss": 0.2422, "num_tokens": 1561971753.0, "step": 8185 }, { "epoch": 2.7939921488308586, "grad_norm": 0.24684595745615953, "learning_rate": 3.8062721294891254e-06, "loss": 0.2421, "num_tokens": 1562161243.0, "step": 8186 }, { "epoch": 2.7943335040109236, "grad_norm": 0.2460122357179669, "learning_rate": 3.7999494183105716e-06, "loss": 0.1967, "num_tokens": 1562324018.0, "step": 8187 }, { "epoch": 2.7946748591909882, "grad_norm": 0.5055107976305818, "learning_rate": 3.7936267071320185e-06, "loss": 0.2246, "num_tokens": 1562510081.0, "step": 8188 }, { "epoch": 2.795016214371053, "grad_norm": 0.2440618723773121, "learning_rate": 3.787303995953465e-06, "loss": 0.2317, "num_tokens": 1562714128.0, "step": 8189 }, { "epoch": 2.795357569551118, "grad_norm": 0.22278801833960343, "learning_rate": 3.7809812847749112e-06, "loss": 0.2209, "num_tokens": 1562908885.0, "step": 8190 }, { "epoch": 2.795698924731183, "grad_norm": 0.24474646120735385, "learning_rate": 3.7746585735963582e-06, "loss": 0.2265, "num_tokens": 1563093875.0, "step": 8191 }, { "epoch": 2.7960402799112476, "grad_norm": 0.255216548183637, "learning_rate": 3.768335862417805e-06, "loss": 0.2231, "num_tokens": 1563245894.0, "step": 8192 }, { "epoch": 2.7963816350913127, "grad_norm": 0.23492609843201137, "learning_rate": 3.7620131512392513e-06, "loss": 0.2335, "num_tokens": 1563457077.0, "step": 8193 }, { "epoch": 2.7967229902713773, "grad_norm": 0.22616319535893406, "learning_rate": 3.7556904400606983e-06, "loss": 0.2365, "num_tokens": 1563651606.0, "step": 8194 }, { "epoch": 2.7970643454514423, "grad_norm": 0.2337545533289057, "learning_rate": 3.749367728882145e-06, "loss": 0.2241, "num_tokens": 1563819066.0, "step": 8195 }, { "epoch": 2.797405700631507, "grad_norm": 0.24590581138332865, "learning_rate": 3.7430450177035914e-06, "loss": 0.2303, "num_tokens": 1564022814.0, "step": 8196 }, { "epoch": 2.797747055811572, "grad_norm": 0.22685822790072036, "learning_rate": 3.736722306525038e-06, "loss": 0.2368, "num_tokens": 1564226325.0, "step": 8197 }, { "epoch": 2.7980884109916366, "grad_norm": 0.23719144239344958, "learning_rate": 3.730399595346485e-06, "loss": 0.2358, "num_tokens": 1564424285.0, "step": 8198 }, { "epoch": 2.7984297661717017, "grad_norm": 0.2282629674480755, "learning_rate": 3.724076884167931e-06, "loss": 0.2144, "num_tokens": 1564586335.0, "step": 8199 }, { "epoch": 2.7987711213517663, "grad_norm": 0.27917801318703, "learning_rate": 3.717754172989378e-06, "loss": 0.2076, "num_tokens": 1564720471.0, "step": 8200 }, { "epoch": 2.7991124765318314, "grad_norm": 0.24234131009380533, "learning_rate": 3.711431461810825e-06, "loss": 0.2308, "num_tokens": 1564903817.0, "step": 8201 }, { "epoch": 2.7994538317118964, "grad_norm": 0.2339479743763533, "learning_rate": 3.7051087506322712e-06, "loss": 0.2316, "num_tokens": 1565098920.0, "step": 8202 }, { "epoch": 2.799795186891961, "grad_norm": 0.2337826067999606, "learning_rate": 3.6987860394537178e-06, "loss": 0.2007, "num_tokens": 1565271320.0, "step": 8203 }, { "epoch": 2.8001365420720257, "grad_norm": 0.2455314617078208, "learning_rate": 3.6924633282751648e-06, "loss": 0.2279, "num_tokens": 1565459995.0, "step": 8204 }, { "epoch": 2.8004778972520907, "grad_norm": 0.22506605318931072, "learning_rate": 3.686140617096611e-06, "loss": 0.2423, "num_tokens": 1565677605.0, "step": 8205 }, { "epoch": 2.800819252432156, "grad_norm": 0.2465471690213832, "learning_rate": 3.679817905918058e-06, "loss": 0.2273, "num_tokens": 1565871041.0, "step": 8206 }, { "epoch": 2.8011606076122204, "grad_norm": 0.2325772302957442, "learning_rate": 3.673495194739505e-06, "loss": 0.2472, "num_tokens": 1566081292.0, "step": 8207 }, { "epoch": 2.8015019627922855, "grad_norm": 0.24818767313136275, "learning_rate": 3.667172483560951e-06, "loss": 0.2522, "num_tokens": 1566266872.0, "step": 8208 }, { "epoch": 2.80184331797235, "grad_norm": 0.24877887802295198, "learning_rate": 3.660849772382398e-06, "loss": 0.2256, "num_tokens": 1566438286.0, "step": 8209 }, { "epoch": 2.802184673152415, "grad_norm": 0.2412483851442655, "learning_rate": 3.6545270612038445e-06, "loss": 0.2521, "num_tokens": 1566655151.0, "step": 8210 }, { "epoch": 2.8025260283324798, "grad_norm": 0.23859947965555708, "learning_rate": 3.648204350025291e-06, "loss": 0.2253, "num_tokens": 1566831892.0, "step": 8211 }, { "epoch": 2.802867383512545, "grad_norm": 0.2480201705630165, "learning_rate": 3.6418816388467377e-06, "loss": 0.2518, "num_tokens": 1567018617.0, "step": 8212 }, { "epoch": 2.80320873869261, "grad_norm": 0.22658541796198337, "learning_rate": 3.635558927668184e-06, "loss": 0.2304, "num_tokens": 1567223157.0, "step": 8213 }, { "epoch": 2.8035500938726745, "grad_norm": 0.23307517954463766, "learning_rate": 3.6292362164896308e-06, "loss": 0.2059, "num_tokens": 1567431601.0, "step": 8214 }, { "epoch": 2.803891449052739, "grad_norm": 0.24170993482864256, "learning_rate": 3.6229135053110778e-06, "loss": 0.2316, "num_tokens": 1567610589.0, "step": 8215 }, { "epoch": 2.804232804232804, "grad_norm": 0.21923460375344933, "learning_rate": 3.616590794132524e-06, "loss": 0.2156, "num_tokens": 1567810996.0, "step": 8216 }, { "epoch": 2.8045741594128692, "grad_norm": 0.21225088121001953, "learning_rate": 3.610268082953971e-06, "loss": 0.2233, "num_tokens": 1568025772.0, "step": 8217 }, { "epoch": 2.804915514592934, "grad_norm": 0.25521103738005024, "learning_rate": 3.6039453717754174e-06, "loss": 0.2244, "num_tokens": 1568239271.0, "step": 8218 }, { "epoch": 2.805256869772999, "grad_norm": 0.23522002439492248, "learning_rate": 3.597622660596864e-06, "loss": 0.2125, "num_tokens": 1568412381.0, "step": 8219 }, { "epoch": 2.8055982249530635, "grad_norm": 0.25646552406191764, "learning_rate": 3.5912999494183106e-06, "loss": 0.2286, "num_tokens": 1568578187.0, "step": 8220 }, { "epoch": 2.8059395801331286, "grad_norm": 0.2376013381059346, "learning_rate": 3.5849772382397575e-06, "loss": 0.2099, "num_tokens": 1568736750.0, "step": 8221 }, { "epoch": 2.8062809353131932, "grad_norm": 0.1969618314250152, "learning_rate": 3.5786545270612037e-06, "loss": 0.2222, "num_tokens": 1568985374.0, "step": 8222 }, { "epoch": 2.8066222904932583, "grad_norm": 0.2317446525161942, "learning_rate": 3.5723318158826507e-06, "loss": 0.2325, "num_tokens": 1569216477.0, "step": 8223 }, { "epoch": 2.8069636456733233, "grad_norm": 0.21083392229818013, "learning_rate": 3.5660091047040976e-06, "loss": 0.2239, "num_tokens": 1569428197.0, "step": 8224 }, { "epoch": 2.807305000853388, "grad_norm": 0.23325765354192182, "learning_rate": 3.5596863935255438e-06, "loss": 0.2284, "num_tokens": 1569630445.0, "step": 8225 }, { "epoch": 2.8076463560334526, "grad_norm": 0.22274529181998048, "learning_rate": 3.5533636823469903e-06, "loss": 0.2266, "num_tokens": 1569818135.0, "step": 8226 }, { "epoch": 2.8079877112135176, "grad_norm": 0.2361847048969329, "learning_rate": 3.5470409711684373e-06, "loss": 0.2134, "num_tokens": 1570015800.0, "step": 8227 }, { "epoch": 2.8083290663935827, "grad_norm": 0.25898448350638853, "learning_rate": 3.5407182599898835e-06, "loss": 0.2134, "num_tokens": 1570181611.0, "step": 8228 }, { "epoch": 2.8086704215736473, "grad_norm": 0.2322031101103062, "learning_rate": 3.5343955488113304e-06, "loss": 0.2619, "num_tokens": 1570388041.0, "step": 8229 }, { "epoch": 2.8090117767537124, "grad_norm": 0.2528203857235543, "learning_rate": 3.5280728376327774e-06, "loss": 0.2337, "num_tokens": 1570566826.0, "step": 8230 }, { "epoch": 2.809353131933777, "grad_norm": 0.223241072357708, "learning_rate": 3.5217501264542236e-06, "loss": 0.2102, "num_tokens": 1570755669.0, "step": 8231 }, { "epoch": 2.809694487113842, "grad_norm": 0.21676608128636923, "learning_rate": 3.5154274152756705e-06, "loss": 0.2287, "num_tokens": 1570967224.0, "step": 8232 }, { "epoch": 2.8100358422939067, "grad_norm": 0.22784541859762186, "learning_rate": 3.509104704097117e-06, "loss": 0.2374, "num_tokens": 1571150853.0, "step": 8233 }, { "epoch": 2.8103771974739717, "grad_norm": 0.22136879059794687, "learning_rate": 3.5027819929185637e-06, "loss": 0.2276, "num_tokens": 1571343101.0, "step": 8234 }, { "epoch": 2.8107185526540364, "grad_norm": 0.2240010312038391, "learning_rate": 3.4964592817400102e-06, "loss": 0.204, "num_tokens": 1571523300.0, "step": 8235 }, { "epoch": 2.8110599078341014, "grad_norm": 0.22276443321713316, "learning_rate": 3.490136570561457e-06, "loss": 0.223, "num_tokens": 1571733024.0, "step": 8236 }, { "epoch": 2.811401263014166, "grad_norm": 0.21606412185539967, "learning_rate": 3.4838138593829033e-06, "loss": 0.2051, "num_tokens": 1571914877.0, "step": 8237 }, { "epoch": 2.811742618194231, "grad_norm": 0.23539268666696997, "learning_rate": 3.4774911482043503e-06, "loss": 0.2108, "num_tokens": 1572080465.0, "step": 8238 }, { "epoch": 2.812083973374296, "grad_norm": 0.2197418827471345, "learning_rate": 3.4711684370257973e-06, "loss": 0.2347, "num_tokens": 1572299897.0, "step": 8239 }, { "epoch": 2.812425328554361, "grad_norm": 0.22388045281492114, "learning_rate": 3.4648457258472434e-06, "loss": 0.239, "num_tokens": 1572498672.0, "step": 8240 }, { "epoch": 2.8127666837344254, "grad_norm": 0.22341028537089988, "learning_rate": 3.45852301466869e-06, "loss": 0.2142, "num_tokens": 1572704144.0, "step": 8241 }, { "epoch": 2.8131080389144905, "grad_norm": 0.24909546847652897, "learning_rate": 3.452200303490137e-06, "loss": 0.2392, "num_tokens": 1572896204.0, "step": 8242 }, { "epoch": 2.8134493940945555, "grad_norm": 0.24562653978368737, "learning_rate": 3.445877592311583e-06, "loss": 0.2052, "num_tokens": 1573069922.0, "step": 8243 }, { "epoch": 2.81379074927462, "grad_norm": 0.2169555906304942, "learning_rate": 3.43955488113303e-06, "loss": 0.2271, "num_tokens": 1573293168.0, "step": 8244 }, { "epoch": 2.814132104454685, "grad_norm": 0.231641378624678, "learning_rate": 3.4332321699544762e-06, "loss": 0.2302, "num_tokens": 1573493722.0, "step": 8245 }, { "epoch": 2.81447345963475, "grad_norm": 0.2225694234579819, "learning_rate": 3.4269094587759232e-06, "loss": 0.2206, "num_tokens": 1573680767.0, "step": 8246 }, { "epoch": 2.814814814814815, "grad_norm": 0.2071243951569216, "learning_rate": 3.42058674759737e-06, "loss": 0.2324, "num_tokens": 1573914479.0, "step": 8247 }, { "epoch": 2.8151561699948795, "grad_norm": 0.2472319620289022, "learning_rate": 3.4142640364188163e-06, "loss": 0.2477, "num_tokens": 1574132979.0, "step": 8248 }, { "epoch": 2.8154975251749446, "grad_norm": 0.21963509387954452, "learning_rate": 3.407941325240263e-06, "loss": 0.248, "num_tokens": 1574360630.0, "step": 8249 }, { "epoch": 2.8158388803550096, "grad_norm": 0.2274698441530565, "learning_rate": 3.40161861406171e-06, "loss": 0.2102, "num_tokens": 1574563492.0, "step": 8250 }, { "epoch": 2.8161802355350742, "grad_norm": 0.2500554964956681, "learning_rate": 3.395295902883156e-06, "loss": 0.2171, "num_tokens": 1574729668.0, "step": 8251 }, { "epoch": 2.816521590715139, "grad_norm": 0.2600473706384843, "learning_rate": 3.388973191704603e-06, "loss": 0.2141, "num_tokens": 1574898929.0, "step": 8252 }, { "epoch": 2.816862945895204, "grad_norm": 0.24714755933107588, "learning_rate": 3.38265048052605e-06, "loss": 0.2367, "num_tokens": 1575082859.0, "step": 8253 }, { "epoch": 2.817204301075269, "grad_norm": 0.234187505577116, "learning_rate": 3.376327769347496e-06, "loss": 0.2321, "num_tokens": 1575275823.0, "step": 8254 }, { "epoch": 2.8175456562553336, "grad_norm": 0.2357588273511746, "learning_rate": 3.370005058168943e-06, "loss": 0.2216, "num_tokens": 1575458991.0, "step": 8255 }, { "epoch": 2.8178870114353987, "grad_norm": 0.23905712257155518, "learning_rate": 3.3636823469903897e-06, "loss": 0.2377, "num_tokens": 1575653279.0, "step": 8256 }, { "epoch": 2.8182283666154633, "grad_norm": 0.23683502034083312, "learning_rate": 3.3573596358118362e-06, "loss": 0.2058, "num_tokens": 1575833177.0, "step": 8257 }, { "epoch": 2.8185697217955283, "grad_norm": 0.23756141265486258, "learning_rate": 3.3510369246332828e-06, "loss": 0.2287, "num_tokens": 1576007230.0, "step": 8258 }, { "epoch": 2.818911076975593, "grad_norm": 0.22004883231801828, "learning_rate": 3.3447142134547298e-06, "loss": 0.2226, "num_tokens": 1576209434.0, "step": 8259 }, { "epoch": 2.819252432155658, "grad_norm": 0.23207385977628048, "learning_rate": 3.338391502276176e-06, "loss": 0.2152, "num_tokens": 1576401849.0, "step": 8260 }, { "epoch": 2.819593787335723, "grad_norm": 0.2141243276497186, "learning_rate": 3.332068791097623e-06, "loss": 0.2251, "num_tokens": 1576627510.0, "step": 8261 }, { "epoch": 2.8199351425157877, "grad_norm": 0.4159740785413498, "learning_rate": 3.32574607991907e-06, "loss": 0.2271, "num_tokens": 1576804424.0, "step": 8262 }, { "epoch": 2.8202764976958523, "grad_norm": 0.2397693028079161, "learning_rate": 3.319423368740516e-06, "loss": 0.2086, "num_tokens": 1576978394.0, "step": 8263 }, { "epoch": 2.8206178528759174, "grad_norm": 0.23965512263439134, "learning_rate": 3.3131006575619626e-06, "loss": 0.2393, "num_tokens": 1577159142.0, "step": 8264 }, { "epoch": 2.8209592080559824, "grad_norm": 0.2442221356518249, "learning_rate": 3.3067779463834096e-06, "loss": 0.2346, "num_tokens": 1577340254.0, "step": 8265 }, { "epoch": 2.821300563236047, "grad_norm": 0.22842011585384056, "learning_rate": 3.3004552352048557e-06, "loss": 0.2073, "num_tokens": 1577521316.0, "step": 8266 }, { "epoch": 2.821641918416112, "grad_norm": 0.23005564444017204, "learning_rate": 3.2941325240263027e-06, "loss": 0.231, "num_tokens": 1577719782.0, "step": 8267 }, { "epoch": 2.8219832735961767, "grad_norm": 0.25900145963991866, "learning_rate": 3.2878098128477497e-06, "loss": 0.1926, "num_tokens": 1577876268.0, "step": 8268 }, { "epoch": 2.822324628776242, "grad_norm": 0.23289307172074186, "learning_rate": 3.281487101669196e-06, "loss": 0.2255, "num_tokens": 1578055178.0, "step": 8269 }, { "epoch": 2.8226659839563064, "grad_norm": 0.22810824922538728, "learning_rate": 3.2751643904906428e-06, "loss": 0.2107, "num_tokens": 1578260855.0, "step": 8270 }, { "epoch": 2.8230073391363715, "grad_norm": 0.24710237591201734, "learning_rate": 3.2688416793120893e-06, "loss": 0.2234, "num_tokens": 1578437947.0, "step": 8271 }, { "epoch": 2.823348694316436, "grad_norm": 0.22030699458132064, "learning_rate": 3.2625189681335355e-06, "loss": 0.2635, "num_tokens": 1578676931.0, "step": 8272 }, { "epoch": 2.823690049496501, "grad_norm": 0.2499914237042516, "learning_rate": 3.2561962569549825e-06, "loss": 0.2343, "num_tokens": 1578850689.0, "step": 8273 }, { "epoch": 2.8240314046765658, "grad_norm": 0.23065025812978712, "learning_rate": 3.2498735457764294e-06, "loss": 0.2291, "num_tokens": 1579033911.0, "step": 8274 }, { "epoch": 2.824372759856631, "grad_norm": 0.25693560275177746, "learning_rate": 3.2435508345978756e-06, "loss": 0.2497, "num_tokens": 1579239103.0, "step": 8275 }, { "epoch": 2.824714115036696, "grad_norm": 0.21995546188364629, "learning_rate": 3.2372281234193226e-06, "loss": 0.221, "num_tokens": 1579447625.0, "step": 8276 }, { "epoch": 2.8250554702167605, "grad_norm": 0.24838632678672243, "learning_rate": 3.2309054122407695e-06, "loss": 0.2032, "num_tokens": 1579591593.0, "step": 8277 }, { "epoch": 2.825396825396825, "grad_norm": 0.24901842384852052, "learning_rate": 3.2245827010622157e-06, "loss": 0.2114, "num_tokens": 1579756399.0, "step": 8278 }, { "epoch": 2.82573818057689, "grad_norm": 0.24506464596949584, "learning_rate": 3.2182599898836622e-06, "loss": 0.2426, "num_tokens": 1579942256.0, "step": 8279 }, { "epoch": 2.8260795357569553, "grad_norm": 0.24417826464070785, "learning_rate": 3.2119372787051084e-06, "loss": 0.2083, "num_tokens": 1580109095.0, "step": 8280 }, { "epoch": 2.82642089093702, "grad_norm": 0.25059639626081365, "learning_rate": 3.2056145675265554e-06, "loss": 0.2208, "num_tokens": 1580280629.0, "step": 8281 }, { "epoch": 2.826762246117085, "grad_norm": 0.2381563099487908, "learning_rate": 3.1992918563480023e-06, "loss": 0.1978, "num_tokens": 1580444218.0, "step": 8282 }, { "epoch": 2.8271036012971495, "grad_norm": 0.2429050820056715, "learning_rate": 3.1929691451694485e-06, "loss": 0.2201, "num_tokens": 1580635218.0, "step": 8283 }, { "epoch": 2.8274449564772146, "grad_norm": 0.24148657286639683, "learning_rate": 3.1866464339908955e-06, "loss": 0.2187, "num_tokens": 1580808668.0, "step": 8284 }, { "epoch": 2.8277863116572792, "grad_norm": 0.26830631347991196, "learning_rate": 3.1803237228123424e-06, "loss": 0.224, "num_tokens": 1580983631.0, "step": 8285 }, { "epoch": 2.8281276668373443, "grad_norm": 0.21874235642691417, "learning_rate": 3.1740010116337886e-06, "loss": 0.2183, "num_tokens": 1581183824.0, "step": 8286 }, { "epoch": 2.8284690220174094, "grad_norm": 0.2622145035005785, "learning_rate": 3.167678300455235e-06, "loss": 0.2286, "num_tokens": 1581370637.0, "step": 8287 }, { "epoch": 2.828810377197474, "grad_norm": 0.2136105007460132, "learning_rate": 3.161355589276682e-06, "loss": 0.201, "num_tokens": 1581559573.0, "step": 8288 }, { "epoch": 2.8291517323775386, "grad_norm": 0.22046178904198513, "learning_rate": 3.1550328780981283e-06, "loss": 0.2179, "num_tokens": 1581738397.0, "step": 8289 }, { "epoch": 2.8294930875576036, "grad_norm": 0.22932220543985962, "learning_rate": 3.1487101669195752e-06, "loss": 0.2048, "num_tokens": 1581921404.0, "step": 8290 }, { "epoch": 2.8298344427376687, "grad_norm": 0.266147941641552, "learning_rate": 3.1423874557410222e-06, "loss": 0.2407, "num_tokens": 1582105869.0, "step": 8291 }, { "epoch": 2.8301757979177333, "grad_norm": 0.21234710874812338, "learning_rate": 3.1360647445624684e-06, "loss": 0.2271, "num_tokens": 1582321270.0, "step": 8292 }, { "epoch": 2.8305171530977984, "grad_norm": 0.22817273398218516, "learning_rate": 3.1297420333839153e-06, "loss": 0.2089, "num_tokens": 1582496522.0, "step": 8293 }, { "epoch": 2.830858508277863, "grad_norm": 0.21888213329062492, "learning_rate": 3.123419322205362e-06, "loss": 0.2286, "num_tokens": 1582711596.0, "step": 8294 }, { "epoch": 2.831199863457928, "grad_norm": 0.262698331417275, "learning_rate": 3.1170966110268085e-06, "loss": 0.2124, "num_tokens": 1582867946.0, "step": 8295 }, { "epoch": 2.8315412186379927, "grad_norm": 0.23616698896539176, "learning_rate": 3.110773899848255e-06, "loss": 0.2256, "num_tokens": 1583077245.0, "step": 8296 }, { "epoch": 2.8318825738180577, "grad_norm": 0.21402181933013745, "learning_rate": 3.1044511886697016e-06, "loss": 0.2105, "num_tokens": 1583266137.0, "step": 8297 }, { "epoch": 2.832223928998123, "grad_norm": 0.21848174197363424, "learning_rate": 3.0981284774911486e-06, "loss": 0.226, "num_tokens": 1583476639.0, "step": 8298 }, { "epoch": 2.8325652841781874, "grad_norm": 0.24272853830536711, "learning_rate": 3.091805766312595e-06, "loss": 0.2034, "num_tokens": 1583656442.0, "step": 8299 }, { "epoch": 2.832906639358252, "grad_norm": 0.2233240205985993, "learning_rate": 3.0854830551340417e-06, "loss": 0.2066, "num_tokens": 1583838363.0, "step": 8300 }, { "epoch": 2.833247994538317, "grad_norm": 0.27608456895952377, "learning_rate": 3.0791603439554882e-06, "loss": 0.2406, "num_tokens": 1584002092.0, "step": 8301 }, { "epoch": 2.833589349718382, "grad_norm": 0.226302254386494, "learning_rate": 3.072837632776935e-06, "loss": 0.2199, "num_tokens": 1584189344.0, "step": 8302 }, { "epoch": 2.833930704898447, "grad_norm": 0.23755710825356424, "learning_rate": 3.0665149215983814e-06, "loss": 0.2181, "num_tokens": 1584356826.0, "step": 8303 }, { "epoch": 2.834272060078512, "grad_norm": 0.28135738900393586, "learning_rate": 3.060192210419828e-06, "loss": 0.2501, "num_tokens": 1584527749.0, "step": 8304 }, { "epoch": 2.8346134152585765, "grad_norm": 0.21235252232481586, "learning_rate": 3.053869499241275e-06, "loss": 0.2424, "num_tokens": 1584768697.0, "step": 8305 }, { "epoch": 2.8349547704386415, "grad_norm": 0.25482098707014167, "learning_rate": 3.0475467880627215e-06, "loss": 0.2076, "num_tokens": 1584931714.0, "step": 8306 }, { "epoch": 2.835296125618706, "grad_norm": 0.24469706588312687, "learning_rate": 3.041224076884168e-06, "loss": 0.2503, "num_tokens": 1585144998.0, "step": 8307 }, { "epoch": 2.835637480798771, "grad_norm": 0.22740753914973394, "learning_rate": 3.034901365705615e-06, "loss": 0.1983, "num_tokens": 1585329073.0, "step": 8308 }, { "epoch": 2.835978835978836, "grad_norm": 0.244358770898838, "learning_rate": 3.028578654527061e-06, "loss": 0.223, "num_tokens": 1585500709.0, "step": 8309 }, { "epoch": 2.836320191158901, "grad_norm": 0.20633029777172035, "learning_rate": 3.0222559433485077e-06, "loss": 0.2232, "num_tokens": 1585705057.0, "step": 8310 }, { "epoch": 2.8366615463389655, "grad_norm": 0.20531100177979583, "learning_rate": 3.0159332321699547e-06, "loss": 0.2153, "num_tokens": 1585939818.0, "step": 8311 }, { "epoch": 2.8370029015190306, "grad_norm": 0.2537174467938121, "learning_rate": 3.0096105209914012e-06, "loss": 0.2279, "num_tokens": 1586113682.0, "step": 8312 }, { "epoch": 2.8373442566990956, "grad_norm": 0.2360851792469832, "learning_rate": 3.003287809812848e-06, "loss": 0.2043, "num_tokens": 1586290030.0, "step": 8313 }, { "epoch": 2.8376856118791602, "grad_norm": 0.27695323350802104, "learning_rate": 2.9969650986342948e-06, "loss": 0.2075, "num_tokens": 1586433043.0, "step": 8314 }, { "epoch": 2.838026967059225, "grad_norm": 0.23798542772920492, "learning_rate": 2.9906423874557413e-06, "loss": 0.2181, "num_tokens": 1586617656.0, "step": 8315 }, { "epoch": 2.83836832223929, "grad_norm": 0.2277154892349043, "learning_rate": 2.984319676277188e-06, "loss": 0.211, "num_tokens": 1586819180.0, "step": 8316 }, { "epoch": 2.838709677419355, "grad_norm": 0.23100298118682716, "learning_rate": 2.9779969650986345e-06, "loss": 0.2149, "num_tokens": 1586998011.0, "step": 8317 }, { "epoch": 2.8390510325994196, "grad_norm": 0.31342736076548555, "learning_rate": 2.971674253920081e-06, "loss": 0.2335, "num_tokens": 1587148226.0, "step": 8318 }, { "epoch": 2.8393923877794847, "grad_norm": 0.2575008844855071, "learning_rate": 2.9653515427415276e-06, "loss": 0.2111, "num_tokens": 1587294310.0, "step": 8319 }, { "epoch": 2.8397337429595493, "grad_norm": 0.2330125074442659, "learning_rate": 2.959028831562974e-06, "loss": 0.2268, "num_tokens": 1587480264.0, "step": 8320 }, { "epoch": 2.8400750981396143, "grad_norm": 0.25454286250680314, "learning_rate": 2.952706120384421e-06, "loss": 0.2143, "num_tokens": 1587644105.0, "step": 8321 }, { "epoch": 2.840416453319679, "grad_norm": 0.24892808874945743, "learning_rate": 2.9463834092058677e-06, "loss": 0.2142, "num_tokens": 1587821199.0, "step": 8322 }, { "epoch": 2.840757808499744, "grad_norm": 0.27943636129610616, "learning_rate": 2.9400606980273142e-06, "loss": 0.2323, "num_tokens": 1587996695.0, "step": 8323 }, { "epoch": 2.841099163679809, "grad_norm": 0.24576488551474157, "learning_rate": 2.933737986848761e-06, "loss": 0.2319, "num_tokens": 1588185298.0, "step": 8324 }, { "epoch": 2.8414405188598737, "grad_norm": 0.285581879107885, "learning_rate": 2.9274152756702074e-06, "loss": 0.2332, "num_tokens": 1588380733.0, "step": 8325 }, { "epoch": 2.8417818740399383, "grad_norm": 0.23296055662895487, "learning_rate": 2.921092564491654e-06, "loss": 0.2293, "num_tokens": 1588578416.0, "step": 8326 }, { "epoch": 2.8421232292200034, "grad_norm": 0.22395538491233793, "learning_rate": 2.914769853313101e-06, "loss": 0.2066, "num_tokens": 1588779444.0, "step": 8327 }, { "epoch": 2.8424645844000684, "grad_norm": 0.2531756358905132, "learning_rate": 2.9084471421345475e-06, "loss": 0.2337, "num_tokens": 1588979380.0, "step": 8328 }, { "epoch": 2.842805939580133, "grad_norm": 0.23599729024964483, "learning_rate": 2.902124430955994e-06, "loss": 0.249, "num_tokens": 1589199285.0, "step": 8329 }, { "epoch": 2.843147294760198, "grad_norm": 0.23610254727544044, "learning_rate": 2.895801719777441e-06, "loss": 0.2165, "num_tokens": 1589413055.0, "step": 8330 }, { "epoch": 2.8434886499402627, "grad_norm": 0.22694426255958058, "learning_rate": 2.8894790085988876e-06, "loss": 0.2475, "num_tokens": 1589639537.0, "step": 8331 }, { "epoch": 2.843830005120328, "grad_norm": 0.2312942304413501, "learning_rate": 2.8831562974203337e-06, "loss": 0.2266, "num_tokens": 1589837820.0, "step": 8332 }, { "epoch": 2.8441713603003924, "grad_norm": 0.24859635057860158, "learning_rate": 2.8768335862417807e-06, "loss": 0.2176, "num_tokens": 1590015950.0, "step": 8333 }, { "epoch": 2.8445127154804575, "grad_norm": 0.2545756531103387, "learning_rate": 2.8705108750632272e-06, "loss": 0.1974, "num_tokens": 1590173173.0, "step": 8334 }, { "epoch": 2.8448540706605225, "grad_norm": 0.22226326333266702, "learning_rate": 2.864188163884674e-06, "loss": 0.227, "num_tokens": 1590419367.0, "step": 8335 }, { "epoch": 2.845195425840587, "grad_norm": 0.2512632413294132, "learning_rate": 2.8578654527061204e-06, "loss": 0.2463, "num_tokens": 1590589628.0, "step": 8336 }, { "epoch": 2.8455367810206518, "grad_norm": 0.23756388097993092, "learning_rate": 2.8515427415275673e-06, "loss": 0.229, "num_tokens": 1590769622.0, "step": 8337 }, { "epoch": 2.845878136200717, "grad_norm": 0.24536446326455721, "learning_rate": 2.845220030349014e-06, "loss": 0.2152, "num_tokens": 1590932035.0, "step": 8338 }, { "epoch": 2.846219491380782, "grad_norm": 0.24103711919151943, "learning_rate": 2.8388973191704605e-06, "loss": 0.2048, "num_tokens": 1591107973.0, "step": 8339 }, { "epoch": 2.8465608465608465, "grad_norm": 0.2237656329728833, "learning_rate": 2.832574607991907e-06, "loss": 0.2241, "num_tokens": 1591320563.0, "step": 8340 }, { "epoch": 2.8469022017409116, "grad_norm": 0.25194548856801474, "learning_rate": 2.8262518968133536e-06, "loss": 0.2452, "num_tokens": 1591497916.0, "step": 8341 }, { "epoch": 2.847243556920976, "grad_norm": 0.22966526249485525, "learning_rate": 2.8199291856348e-06, "loss": 0.2198, "num_tokens": 1591697654.0, "step": 8342 }, { "epoch": 2.8475849121010413, "grad_norm": 0.25787075445823743, "learning_rate": 2.813606474456247e-06, "loss": 0.2083, "num_tokens": 1591855080.0, "step": 8343 }, { "epoch": 2.847926267281106, "grad_norm": 0.2443386767883454, "learning_rate": 2.8072837632776937e-06, "loss": 0.2299, "num_tokens": 1592037142.0, "step": 8344 }, { "epoch": 2.848267622461171, "grad_norm": 0.2433600653778701, "learning_rate": 2.8009610520991402e-06, "loss": 0.2227, "num_tokens": 1592213075.0, "step": 8345 }, { "epoch": 2.8486089776412356, "grad_norm": 0.20975171562625403, "learning_rate": 2.7946383409205872e-06, "loss": 0.2257, "num_tokens": 1592422513.0, "step": 8346 }, { "epoch": 2.8489503328213006, "grad_norm": 0.25372831272152013, "learning_rate": 2.7883156297420334e-06, "loss": 0.2072, "num_tokens": 1592570338.0, "step": 8347 }, { "epoch": 2.8492916880013652, "grad_norm": 0.22068424073224766, "learning_rate": 2.78199291856348e-06, "loss": 0.2168, "num_tokens": 1592745802.0, "step": 8348 }, { "epoch": 2.8496330431814303, "grad_norm": 0.22567885108984712, "learning_rate": 2.775670207384927e-06, "loss": 0.2204, "num_tokens": 1592926069.0, "step": 8349 }, { "epoch": 2.8499743983614954, "grad_norm": 0.22996921678198737, "learning_rate": 2.7693474962063735e-06, "loss": 0.2111, "num_tokens": 1593125856.0, "step": 8350 }, { "epoch": 2.85031575354156, "grad_norm": 0.2352548107222976, "learning_rate": 2.76302478502782e-06, "loss": 0.217, "num_tokens": 1593327596.0, "step": 8351 }, { "epoch": 2.8506571087216246, "grad_norm": 0.23474458272935944, "learning_rate": 2.7567020738492666e-06, "loss": 0.2363, "num_tokens": 1593541466.0, "step": 8352 }, { "epoch": 2.8509984639016897, "grad_norm": 0.25643486907127416, "learning_rate": 2.7503793626707136e-06, "loss": 0.2216, "num_tokens": 1593708725.0, "step": 8353 }, { "epoch": 2.8513398190817547, "grad_norm": 0.2196446808254112, "learning_rate": 2.74405665149216e-06, "loss": 0.232, "num_tokens": 1593929894.0, "step": 8354 }, { "epoch": 2.8516811742618193, "grad_norm": 0.23633643653686853, "learning_rate": 2.7377339403136063e-06, "loss": 0.2054, "num_tokens": 1594095708.0, "step": 8355 }, { "epoch": 2.8520225294418844, "grad_norm": 0.21937889493536886, "learning_rate": 2.7314112291350532e-06, "loss": 0.2349, "num_tokens": 1594313786.0, "step": 8356 }, { "epoch": 2.852363884621949, "grad_norm": 0.23260936111327382, "learning_rate": 2.7250885179565e-06, "loss": 0.2312, "num_tokens": 1594514059.0, "step": 8357 }, { "epoch": 2.852705239802014, "grad_norm": 0.24241771060054326, "learning_rate": 2.7187658067779464e-06, "loss": 0.262, "num_tokens": 1594712044.0, "step": 8358 }, { "epoch": 2.8530465949820787, "grad_norm": 0.24992967198615884, "learning_rate": 2.7124430955993933e-06, "loss": 0.1937, "num_tokens": 1594880781.0, "step": 8359 }, { "epoch": 2.8533879501621437, "grad_norm": 0.2544544367159949, "learning_rate": 2.70612038442084e-06, "loss": 0.2047, "num_tokens": 1595037669.0, "step": 8360 }, { "epoch": 2.853729305342209, "grad_norm": 0.22678757470675212, "learning_rate": 2.6997976732422865e-06, "loss": 0.2252, "num_tokens": 1595238729.0, "step": 8361 }, { "epoch": 2.8540706605222734, "grad_norm": 0.2120853941829323, "learning_rate": 2.693474962063733e-06, "loss": 0.215, "num_tokens": 1595452847.0, "step": 8362 }, { "epoch": 2.854412015702338, "grad_norm": 0.24265145547824546, "learning_rate": 2.6871522508851796e-06, "loss": 0.2184, "num_tokens": 1595629559.0, "step": 8363 }, { "epoch": 2.854753370882403, "grad_norm": 0.23158009247698083, "learning_rate": 2.680829539706626e-06, "loss": 0.2093, "num_tokens": 1595807831.0, "step": 8364 }, { "epoch": 2.855094726062468, "grad_norm": 0.23295105097326946, "learning_rate": 2.674506828528073e-06, "loss": 0.2357, "num_tokens": 1596000049.0, "step": 8365 }, { "epoch": 2.855436081242533, "grad_norm": 0.23548382282862554, "learning_rate": 2.6681841173495197e-06, "loss": 0.2375, "num_tokens": 1596211669.0, "step": 8366 }, { "epoch": 2.855777436422598, "grad_norm": 0.22447817062915412, "learning_rate": 2.6618614061709662e-06, "loss": 0.2214, "num_tokens": 1596405285.0, "step": 8367 }, { "epoch": 2.8561187916026625, "grad_norm": 0.23975502834228649, "learning_rate": 2.655538694992413e-06, "loss": 0.2424, "num_tokens": 1596602056.0, "step": 8368 }, { "epoch": 2.8564601467827275, "grad_norm": 0.22884296552944314, "learning_rate": 2.6492159838138594e-06, "loss": 0.2301, "num_tokens": 1596812521.0, "step": 8369 }, { "epoch": 2.856801501962792, "grad_norm": 0.21738564153081272, "learning_rate": 2.642893272635306e-06, "loss": 0.2258, "num_tokens": 1597025812.0, "step": 8370 }, { "epoch": 2.857142857142857, "grad_norm": 0.20717200992163806, "learning_rate": 2.6365705614567525e-06, "loss": 0.2166, "num_tokens": 1597252562.0, "step": 8371 }, { "epoch": 2.8574842123229223, "grad_norm": 0.23697501955634204, "learning_rate": 2.6302478502781995e-06, "loss": 0.2353, "num_tokens": 1597436224.0, "step": 8372 }, { "epoch": 2.857825567502987, "grad_norm": 0.20086254545060983, "learning_rate": 2.623925139099646e-06, "loss": 0.2251, "num_tokens": 1597670636.0, "step": 8373 }, { "epoch": 2.8581669226830515, "grad_norm": 0.23024783037051091, "learning_rate": 2.6176024279210926e-06, "loss": 0.2339, "num_tokens": 1597882808.0, "step": 8374 }, { "epoch": 2.8585082778631166, "grad_norm": 0.21259796099794795, "learning_rate": 2.6112797167425396e-06, "loss": 0.2055, "num_tokens": 1598092376.0, "step": 8375 }, { "epoch": 2.8588496330431816, "grad_norm": 0.2491074275724851, "learning_rate": 2.604957005563986e-06, "loss": 0.2121, "num_tokens": 1598258322.0, "step": 8376 }, { "epoch": 2.8591909882232462, "grad_norm": 0.21044345328692557, "learning_rate": 2.5986342943854327e-06, "loss": 0.2547, "num_tokens": 1598497754.0, "step": 8377 }, { "epoch": 2.8595323434033113, "grad_norm": 0.22412096823985658, "learning_rate": 2.5923115832068792e-06, "loss": 0.2274, "num_tokens": 1598704307.0, "step": 8378 }, { "epoch": 2.859873698583376, "grad_norm": 0.23951077354716255, "learning_rate": 2.585988872028326e-06, "loss": 0.207, "num_tokens": 1598881867.0, "step": 8379 }, { "epoch": 2.860215053763441, "grad_norm": 0.22603506716866575, "learning_rate": 2.5796661608497724e-06, "loss": 0.206, "num_tokens": 1599069462.0, "step": 8380 }, { "epoch": 2.8605564089435056, "grad_norm": 0.23376341996887304, "learning_rate": 2.5733434496712194e-06, "loss": 0.2022, "num_tokens": 1599240473.0, "step": 8381 }, { "epoch": 2.8608977641235707, "grad_norm": 0.21353319480736993, "learning_rate": 2.567020738492666e-06, "loss": 0.2446, "num_tokens": 1599494941.0, "step": 8382 }, { "epoch": 2.8612391193036353, "grad_norm": 0.2157156566174417, "learning_rate": 2.5606980273141125e-06, "loss": 0.2575, "num_tokens": 1599745238.0, "step": 8383 }, { "epoch": 2.8615804744837003, "grad_norm": 0.22235453149861625, "learning_rate": 2.554375316135559e-06, "loss": 0.2194, "num_tokens": 1599956005.0, "step": 8384 }, { "epoch": 2.861921829663765, "grad_norm": 0.22229923322053463, "learning_rate": 2.5480526049570056e-06, "loss": 0.2016, "num_tokens": 1600140817.0, "step": 8385 }, { "epoch": 2.86226318484383, "grad_norm": 0.2192340045710721, "learning_rate": 2.541729893778452e-06, "loss": 0.2446, "num_tokens": 1600353769.0, "step": 8386 }, { "epoch": 2.862604540023895, "grad_norm": 0.23364268404602337, "learning_rate": 2.5354071825998987e-06, "loss": 0.2254, "num_tokens": 1600558351.0, "step": 8387 }, { "epoch": 2.8629458952039597, "grad_norm": 0.2592198668455578, "learning_rate": 2.5290844714213457e-06, "loss": 0.2287, "num_tokens": 1600719607.0, "step": 8388 }, { "epoch": 2.8632872503840243, "grad_norm": 0.22753204232317253, "learning_rate": 2.5227617602427923e-06, "loss": 0.2303, "num_tokens": 1600930631.0, "step": 8389 }, { "epoch": 2.8636286055640894, "grad_norm": 0.21604821949626773, "learning_rate": 2.516439049064239e-06, "loss": 0.2245, "num_tokens": 1601146131.0, "step": 8390 }, { "epoch": 2.8639699607441544, "grad_norm": 0.24820736409345295, "learning_rate": 2.510116337885686e-06, "loss": 0.2124, "num_tokens": 1601307674.0, "step": 8391 }, { "epoch": 2.864311315924219, "grad_norm": 0.22174936447959898, "learning_rate": 2.503793626707132e-06, "loss": 0.2467, "num_tokens": 1601542816.0, "step": 8392 }, { "epoch": 2.864652671104284, "grad_norm": 0.24066341215887938, "learning_rate": 2.4974709155285785e-06, "loss": 0.2403, "num_tokens": 1601727674.0, "step": 8393 }, { "epoch": 2.8649940262843487, "grad_norm": 0.22780722357308575, "learning_rate": 2.4911482043500255e-06, "loss": 0.2292, "num_tokens": 1601938783.0, "step": 8394 }, { "epoch": 2.865335381464414, "grad_norm": 0.2415797529858707, "learning_rate": 2.484825493171472e-06, "loss": 0.1969, "num_tokens": 1602096750.0, "step": 8395 }, { "epoch": 2.8656767366444784, "grad_norm": 0.25335088950889684, "learning_rate": 2.4785027819929186e-06, "loss": 0.2219, "num_tokens": 1602256520.0, "step": 8396 }, { "epoch": 2.8660180918245435, "grad_norm": 0.23588001860844476, "learning_rate": 2.4721800708143656e-06, "loss": 0.2134, "num_tokens": 1602461912.0, "step": 8397 }, { "epoch": 2.8663594470046085, "grad_norm": 0.22452060664304763, "learning_rate": 2.465857359635812e-06, "loss": 0.2411, "num_tokens": 1602702032.0, "step": 8398 }, { "epoch": 2.866700802184673, "grad_norm": 0.22617143663005762, "learning_rate": 2.4595346484572587e-06, "loss": 0.2175, "num_tokens": 1602891305.0, "step": 8399 }, { "epoch": 2.8670421573647378, "grad_norm": 0.24187749843025413, "learning_rate": 2.4532119372787053e-06, "loss": 0.233, "num_tokens": 1603093072.0, "step": 8400 }, { "epoch": 2.867383512544803, "grad_norm": 0.23989901036987774, "learning_rate": 2.446889226100152e-06, "loss": 0.2189, "num_tokens": 1603278744.0, "step": 8401 }, { "epoch": 2.867724867724868, "grad_norm": 0.2242485770164396, "learning_rate": 2.4405665149215984e-06, "loss": 0.2121, "num_tokens": 1603483820.0, "step": 8402 }, { "epoch": 2.8680662229049325, "grad_norm": 0.23464985228488813, "learning_rate": 2.434243803743045e-06, "loss": 0.2442, "num_tokens": 1603671986.0, "step": 8403 }, { "epoch": 2.8684075780849976, "grad_norm": 0.23260136782604884, "learning_rate": 2.427921092564492e-06, "loss": 0.2207, "num_tokens": 1603887014.0, "step": 8404 }, { "epoch": 2.868748933265062, "grad_norm": 0.2246419293118579, "learning_rate": 2.4215983813859385e-06, "loss": 0.2305, "num_tokens": 1604087654.0, "step": 8405 }, { "epoch": 2.8690902884451273, "grad_norm": 0.25118115163118876, "learning_rate": 2.415275670207385e-06, "loss": 0.2224, "num_tokens": 1604261759.0, "step": 8406 }, { "epoch": 2.869431643625192, "grad_norm": 0.23357857472591861, "learning_rate": 2.4089529590288316e-06, "loss": 0.2324, "num_tokens": 1604447803.0, "step": 8407 }, { "epoch": 2.869772998805257, "grad_norm": 0.23641412700810485, "learning_rate": 2.402630247850278e-06, "loss": 0.2089, "num_tokens": 1604641498.0, "step": 8408 }, { "epoch": 2.870114353985322, "grad_norm": 0.2394858584589868, "learning_rate": 2.3963075366717247e-06, "loss": 0.2594, "num_tokens": 1604872118.0, "step": 8409 }, { "epoch": 2.8704557091653866, "grad_norm": 0.23897634074389823, "learning_rate": 2.3899848254931717e-06, "loss": 0.2165, "num_tokens": 1605065083.0, "step": 8410 }, { "epoch": 2.8707970643454512, "grad_norm": 0.22577470790377935, "learning_rate": 2.3836621143146183e-06, "loss": 0.2258, "num_tokens": 1605295252.0, "step": 8411 }, { "epoch": 2.8711384195255163, "grad_norm": 0.2646431868861988, "learning_rate": 2.377339403136065e-06, "loss": 0.2381, "num_tokens": 1605483682.0, "step": 8412 }, { "epoch": 2.8714797747055814, "grad_norm": 0.22369256841323448, "learning_rate": 2.371016691957512e-06, "loss": 0.2233, "num_tokens": 1605677728.0, "step": 8413 }, { "epoch": 2.871821129885646, "grad_norm": 0.24618983206494774, "learning_rate": 2.3646939807789584e-06, "loss": 0.2369, "num_tokens": 1605860552.0, "step": 8414 }, { "epoch": 2.872162485065711, "grad_norm": 0.23136351112356532, "learning_rate": 2.3583712696004045e-06, "loss": 0.2236, "num_tokens": 1606057940.0, "step": 8415 }, { "epoch": 2.8725038402457757, "grad_norm": 0.23272276181857413, "learning_rate": 2.3520485584218515e-06, "loss": 0.2146, "num_tokens": 1606243478.0, "step": 8416 }, { "epoch": 2.8728451954258407, "grad_norm": 0.22922909650901205, "learning_rate": 2.345725847243298e-06, "loss": 0.2114, "num_tokens": 1606430154.0, "step": 8417 }, { "epoch": 2.8731865506059053, "grad_norm": 0.24453995679531845, "learning_rate": 2.3394031360647446e-06, "loss": 0.2114, "num_tokens": 1606611065.0, "step": 8418 }, { "epoch": 2.8735279057859704, "grad_norm": 0.23387476607587313, "learning_rate": 2.333080424886191e-06, "loss": 0.2218, "num_tokens": 1606785954.0, "step": 8419 }, { "epoch": 2.873869260966035, "grad_norm": 0.24887870110236324, "learning_rate": 2.326757713707638e-06, "loss": 0.1986, "num_tokens": 1606971881.0, "step": 8420 }, { "epoch": 2.8742106161461, "grad_norm": 0.2541464870663105, "learning_rate": 2.3204350025290847e-06, "loss": 0.2482, "num_tokens": 1607175573.0, "step": 8421 }, { "epoch": 2.8745519713261647, "grad_norm": 0.22771238094202853, "learning_rate": 2.3141122913505313e-06, "loss": 0.2202, "num_tokens": 1607365349.0, "step": 8422 }, { "epoch": 2.8748933265062298, "grad_norm": 0.2375842840525517, "learning_rate": 2.307789580171978e-06, "loss": 0.24, "num_tokens": 1607563555.0, "step": 8423 }, { "epoch": 2.875234681686295, "grad_norm": 0.21636716156176214, "learning_rate": 2.3014668689934244e-06, "loss": 0.2279, "num_tokens": 1607786733.0, "step": 8424 }, { "epoch": 2.8755760368663594, "grad_norm": 0.23414696982866443, "learning_rate": 2.295144157814871e-06, "loss": 0.2176, "num_tokens": 1608002996.0, "step": 8425 }, { "epoch": 2.875917392046424, "grad_norm": 0.24647928303498676, "learning_rate": 2.288821446636318e-06, "loss": 0.2198, "num_tokens": 1608168884.0, "step": 8426 }, { "epoch": 2.876258747226489, "grad_norm": 0.2266800986098277, "learning_rate": 2.2824987354577645e-06, "loss": 0.2172, "num_tokens": 1608379987.0, "step": 8427 }, { "epoch": 2.876600102406554, "grad_norm": 0.2548392803573111, "learning_rate": 2.276176024279211e-06, "loss": 0.2251, "num_tokens": 1608565955.0, "step": 8428 }, { "epoch": 2.876941457586619, "grad_norm": 0.25715195647269146, "learning_rate": 2.269853313100658e-06, "loss": 0.2268, "num_tokens": 1608730356.0, "step": 8429 }, { "epoch": 2.877282812766684, "grad_norm": 0.23779336347594202, "learning_rate": 2.263530601922104e-06, "loss": 0.2429, "num_tokens": 1608937651.0, "step": 8430 }, { "epoch": 2.8776241679467485, "grad_norm": 0.21683828121951332, "learning_rate": 2.2572078907435507e-06, "loss": 0.2096, "num_tokens": 1609141286.0, "step": 8431 }, { "epoch": 2.8779655231268135, "grad_norm": 0.2324878171406135, "learning_rate": 2.2508851795649977e-06, "loss": 0.2266, "num_tokens": 1609343286.0, "step": 8432 }, { "epoch": 2.878306878306878, "grad_norm": 0.23836064308145727, "learning_rate": 2.2445624683864443e-06, "loss": 0.2167, "num_tokens": 1609525128.0, "step": 8433 }, { "epoch": 2.878648233486943, "grad_norm": 0.22794738400324338, "learning_rate": 2.238239757207891e-06, "loss": 0.2444, "num_tokens": 1609749279.0, "step": 8434 }, { "epoch": 2.8789895886670083, "grad_norm": 0.2271093015707154, "learning_rate": 2.2319170460293374e-06, "loss": 0.2278, "num_tokens": 1609944415.0, "step": 8435 }, { "epoch": 2.879330943847073, "grad_norm": 0.24331822707401904, "learning_rate": 2.2255943348507844e-06, "loss": 0.2032, "num_tokens": 1610103992.0, "step": 8436 }, { "epoch": 2.8796722990271375, "grad_norm": 0.2355912077084265, "learning_rate": 2.219271623672231e-06, "loss": 0.219, "num_tokens": 1610300111.0, "step": 8437 }, { "epoch": 2.8800136542072026, "grad_norm": 0.2274538443981524, "learning_rate": 2.212948912493677e-06, "loss": 0.2229, "num_tokens": 1610501553.0, "step": 8438 }, { "epoch": 2.8803550093872676, "grad_norm": 0.27204562633199203, "learning_rate": 2.206626201315124e-06, "loss": 0.2248, "num_tokens": 1610670037.0, "step": 8439 }, { "epoch": 2.8806963645673322, "grad_norm": 0.23572497214464053, "learning_rate": 2.2003034901365706e-06, "loss": 0.2128, "num_tokens": 1610845631.0, "step": 8440 }, { "epoch": 2.8810377197473973, "grad_norm": 0.26743217992535273, "learning_rate": 2.193980778958017e-06, "loss": 0.2104, "num_tokens": 1611008896.0, "step": 8441 }, { "epoch": 2.881379074927462, "grad_norm": 0.23315293306486204, "learning_rate": 2.187658067779464e-06, "loss": 0.2227, "num_tokens": 1611198640.0, "step": 8442 }, { "epoch": 2.881720430107527, "grad_norm": 0.23128036660204696, "learning_rate": 2.1813353566009107e-06, "loss": 0.2295, "num_tokens": 1611386360.0, "step": 8443 }, { "epoch": 2.8820617852875916, "grad_norm": 0.21911907029258235, "learning_rate": 2.1750126454223573e-06, "loss": 0.2268, "num_tokens": 1611610974.0, "step": 8444 }, { "epoch": 2.8824031404676567, "grad_norm": 0.24271878449595033, "learning_rate": 2.168689934243804e-06, "loss": 0.2173, "num_tokens": 1611760426.0, "step": 8445 }, { "epoch": 2.8827444956477217, "grad_norm": 0.24333409804707282, "learning_rate": 2.1623672230652504e-06, "loss": 0.2085, "num_tokens": 1611930674.0, "step": 8446 }, { "epoch": 2.8830858508277863, "grad_norm": 0.21745742516068461, "learning_rate": 2.156044511886697e-06, "loss": 0.2167, "num_tokens": 1612130880.0, "step": 8447 }, { "epoch": 2.883427206007851, "grad_norm": 0.22861067897112627, "learning_rate": 2.149721800708144e-06, "loss": 0.2322, "num_tokens": 1612339173.0, "step": 8448 }, { "epoch": 2.883768561187916, "grad_norm": 0.23930964611620212, "learning_rate": 2.1433990895295905e-06, "loss": 0.1982, "num_tokens": 1612497599.0, "step": 8449 }, { "epoch": 2.884109916367981, "grad_norm": 0.22450764388678418, "learning_rate": 2.137076378351037e-06, "loss": 0.2665, "num_tokens": 1612726504.0, "step": 8450 }, { "epoch": 2.8844512715480457, "grad_norm": 0.20749436852877173, "learning_rate": 2.1307536671724836e-06, "loss": 0.2143, "num_tokens": 1612943214.0, "step": 8451 }, { "epoch": 2.8847926267281108, "grad_norm": 0.2590940015228838, "learning_rate": 2.12443095599393e-06, "loss": 0.191, "num_tokens": 1613104190.0, "step": 8452 }, { "epoch": 2.8851339819081754, "grad_norm": 0.23016977694614824, "learning_rate": 2.1181082448153767e-06, "loss": 0.2119, "num_tokens": 1613293277.0, "step": 8453 }, { "epoch": 2.8854753370882404, "grad_norm": 0.2775611262544085, "learning_rate": 2.1117855336368233e-06, "loss": 0.2082, "num_tokens": 1613447018.0, "step": 8454 }, { "epoch": 2.885816692268305, "grad_norm": 0.23620684140637632, "learning_rate": 2.1054628224582703e-06, "loss": 0.2316, "num_tokens": 1613642740.0, "step": 8455 }, { "epoch": 2.88615804744837, "grad_norm": 0.229368469386392, "learning_rate": 2.099140111279717e-06, "loss": 0.2431, "num_tokens": 1613872962.0, "step": 8456 }, { "epoch": 2.8864994026284347, "grad_norm": 0.22155338846209482, "learning_rate": 2.0928174001011634e-06, "loss": 0.2218, "num_tokens": 1614066834.0, "step": 8457 }, { "epoch": 2.8868407578085, "grad_norm": 0.23564522016677591, "learning_rate": 2.0864946889226104e-06, "loss": 0.2266, "num_tokens": 1614249746.0, "step": 8458 }, { "epoch": 2.8871821129885644, "grad_norm": 0.22709992345561514, "learning_rate": 2.080171977744057e-06, "loss": 0.212, "num_tokens": 1614432160.0, "step": 8459 }, { "epoch": 2.8875234681686295, "grad_norm": 0.23040107147448985, "learning_rate": 2.0738492665655035e-06, "loss": 0.2104, "num_tokens": 1614637001.0, "step": 8460 }, { "epoch": 2.8878648233486945, "grad_norm": 0.20203173615162545, "learning_rate": 2.06752655538695e-06, "loss": 0.2327, "num_tokens": 1614894011.0, "step": 8461 }, { "epoch": 2.888206178528759, "grad_norm": 0.24729133420591726, "learning_rate": 2.0612038442083966e-06, "loss": 0.2169, "num_tokens": 1615069138.0, "step": 8462 }, { "epoch": 2.8885475337088238, "grad_norm": 0.2574942984566644, "learning_rate": 2.054881133029843e-06, "loss": 0.2085, "num_tokens": 1615235346.0, "step": 8463 }, { "epoch": 2.888888888888889, "grad_norm": 0.23087678226510303, "learning_rate": 2.04855842185129e-06, "loss": 0.2139, "num_tokens": 1615408297.0, "step": 8464 }, { "epoch": 2.889230244068954, "grad_norm": 0.22942914188521, "learning_rate": 2.0422357106727367e-06, "loss": 0.2141, "num_tokens": 1615578630.0, "step": 8465 }, { "epoch": 2.8895715992490185, "grad_norm": 0.24220709197910315, "learning_rate": 2.0359129994941833e-06, "loss": 0.2277, "num_tokens": 1615780460.0, "step": 8466 }, { "epoch": 2.8899129544290836, "grad_norm": 0.2593178279055673, "learning_rate": 2.02959028831563e-06, "loss": 0.2255, "num_tokens": 1615960900.0, "step": 8467 }, { "epoch": 2.890254309609148, "grad_norm": 0.6195999674174248, "learning_rate": 2.0232675771370764e-06, "loss": 0.2182, "num_tokens": 1616163072.0, "step": 8468 }, { "epoch": 2.8905956647892133, "grad_norm": 0.21545787313279235, "learning_rate": 2.016944865958523e-06, "loss": 0.2114, "num_tokens": 1616349063.0, "step": 8469 }, { "epoch": 2.890937019969278, "grad_norm": 0.30005350718969, "learning_rate": 2.0106221547799695e-06, "loss": 0.2047, "num_tokens": 1616512694.0, "step": 8470 }, { "epoch": 2.891278375149343, "grad_norm": 0.23055719234632882, "learning_rate": 2.0042994436014165e-06, "loss": 0.2155, "num_tokens": 1616695658.0, "step": 8471 }, { "epoch": 2.891619730329408, "grad_norm": 0.23880033610411167, "learning_rate": 1.997976732422863e-06, "loss": 0.2148, "num_tokens": 1616885227.0, "step": 8472 }, { "epoch": 2.8919610855094726, "grad_norm": 0.23284331483523663, "learning_rate": 1.9916540212443096e-06, "loss": 0.2176, "num_tokens": 1617078160.0, "step": 8473 }, { "epoch": 2.8923024406895372, "grad_norm": 0.28816742049387606, "learning_rate": 1.9853313100657566e-06, "loss": 0.2205, "num_tokens": 1617233856.0, "step": 8474 }, { "epoch": 2.8926437958696023, "grad_norm": 0.2547474219229356, "learning_rate": 1.9790085988872027e-06, "loss": 0.2163, "num_tokens": 1617430680.0, "step": 8475 }, { "epoch": 2.8929851510496674, "grad_norm": 0.2290501656601428, "learning_rate": 1.9726858877086493e-06, "loss": 0.2121, "num_tokens": 1617614689.0, "step": 8476 }, { "epoch": 2.893326506229732, "grad_norm": 0.21776601751076702, "learning_rate": 1.9663631765300963e-06, "loss": 0.2462, "num_tokens": 1617853008.0, "step": 8477 }, { "epoch": 2.893667861409797, "grad_norm": 0.23160152528460917, "learning_rate": 1.960040465351543e-06, "loss": 0.2285, "num_tokens": 1618040816.0, "step": 8478 }, { "epoch": 2.8940092165898617, "grad_norm": 0.24561950719328424, "learning_rate": 1.9537177541729894e-06, "loss": 0.2085, "num_tokens": 1618183332.0, "step": 8479 }, { "epoch": 2.8943505717699267, "grad_norm": 0.24465501534500603, "learning_rate": 1.9473950429944364e-06, "loss": 0.243, "num_tokens": 1618379764.0, "step": 8480 }, { "epoch": 2.8946919269499913, "grad_norm": 0.2469206209034054, "learning_rate": 1.941072331815883e-06, "loss": 0.1918, "num_tokens": 1618531064.0, "step": 8481 }, { "epoch": 2.8950332821300564, "grad_norm": 0.22570734896870934, "learning_rate": 1.9347496206373295e-06, "loss": 0.243, "num_tokens": 1618753028.0, "step": 8482 }, { "epoch": 2.8953746373101215, "grad_norm": 0.24421672356716223, "learning_rate": 1.928426909458776e-06, "loss": 0.2481, "num_tokens": 1618939898.0, "step": 8483 }, { "epoch": 2.895715992490186, "grad_norm": 0.2430631388312161, "learning_rate": 1.9221041982802226e-06, "loss": 0.2126, "num_tokens": 1619110905.0, "step": 8484 }, { "epoch": 2.8960573476702507, "grad_norm": 0.2392316401122833, "learning_rate": 1.915781487101669e-06, "loss": 0.2523, "num_tokens": 1619306821.0, "step": 8485 }, { "epoch": 2.8963987028503158, "grad_norm": 0.2707170842330326, "learning_rate": 1.9094587759231157e-06, "loss": 0.262, "num_tokens": 1619475973.0, "step": 8486 }, { "epoch": 2.896740058030381, "grad_norm": 0.251105253412596, "learning_rate": 1.9031360647445627e-06, "loss": 0.2193, "num_tokens": 1619649141.0, "step": 8487 }, { "epoch": 2.8970814132104454, "grad_norm": 0.2379142764045924, "learning_rate": 1.8968133535660093e-06, "loss": 0.1973, "num_tokens": 1619809647.0, "step": 8488 }, { "epoch": 2.89742276839051, "grad_norm": 0.23456865921158557, "learning_rate": 1.8904906423874556e-06, "loss": 0.2417, "num_tokens": 1620005411.0, "step": 8489 }, { "epoch": 2.897764123570575, "grad_norm": 0.2327498152793564, "learning_rate": 1.8841679312089026e-06, "loss": 0.2204, "num_tokens": 1620190079.0, "step": 8490 }, { "epoch": 2.89810547875064, "grad_norm": 0.22641666248078768, "learning_rate": 1.8778452200303492e-06, "loss": 0.223, "num_tokens": 1620407080.0, "step": 8491 }, { "epoch": 2.898446833930705, "grad_norm": 0.22319171342497637, "learning_rate": 1.8715225088517957e-06, "loss": 0.2288, "num_tokens": 1620617005.0, "step": 8492 }, { "epoch": 2.89878818911077, "grad_norm": 0.202336670895428, "learning_rate": 1.8651997976732425e-06, "loss": 0.2352, "num_tokens": 1620842948.0, "step": 8493 }, { "epoch": 2.8991295442908345, "grad_norm": 0.23096294300428796, "learning_rate": 1.858877086494689e-06, "loss": 0.2169, "num_tokens": 1621032124.0, "step": 8494 }, { "epoch": 2.8994708994708995, "grad_norm": 0.23353510672351419, "learning_rate": 1.8525543753161356e-06, "loss": 0.2383, "num_tokens": 1621223072.0, "step": 8495 }, { "epoch": 2.899812254650964, "grad_norm": 0.261774552268035, "learning_rate": 1.8462316641375824e-06, "loss": 0.2107, "num_tokens": 1621375795.0, "step": 8496 }, { "epoch": 2.900153609831029, "grad_norm": 0.2143235011063222, "learning_rate": 1.839908952959029e-06, "loss": 0.2383, "num_tokens": 1621579710.0, "step": 8497 }, { "epoch": 2.9004949650110943, "grad_norm": 0.24413461017607166, "learning_rate": 1.8335862417804755e-06, "loss": 0.2068, "num_tokens": 1621745212.0, "step": 8498 }, { "epoch": 2.900836320191159, "grad_norm": 0.2526299857977134, "learning_rate": 1.8272635306019223e-06, "loss": 0.2393, "num_tokens": 1621923001.0, "step": 8499 }, { "epoch": 2.9011776753712235, "grad_norm": 0.25161974639301543, "learning_rate": 1.8209408194233688e-06, "loss": 0.2429, "num_tokens": 1622096713.0, "step": 8500 }, { "epoch": 2.9015190305512886, "grad_norm": 0.2229770963823017, "learning_rate": 1.8146181082448154e-06, "loss": 0.2147, "num_tokens": 1622297212.0, "step": 8501 }, { "epoch": 2.9018603857313536, "grad_norm": 0.23342552123689694, "learning_rate": 1.808295397066262e-06, "loss": 0.2369, "num_tokens": 1622500427.0, "step": 8502 }, { "epoch": 2.9022017409114182, "grad_norm": 0.23909490445244644, "learning_rate": 1.8019726858877087e-06, "loss": 0.2051, "num_tokens": 1622692145.0, "step": 8503 }, { "epoch": 2.9025430960914833, "grad_norm": 0.2475740750180202, "learning_rate": 1.7956499747091553e-06, "loss": 0.2095, "num_tokens": 1622858558.0, "step": 8504 }, { "epoch": 2.902884451271548, "grad_norm": 0.22851260418335592, "learning_rate": 1.7893272635306018e-06, "loss": 0.2413, "num_tokens": 1623058233.0, "step": 8505 }, { "epoch": 2.903225806451613, "grad_norm": 0.22923769641873099, "learning_rate": 1.7830045523520488e-06, "loss": 0.224, "num_tokens": 1623258710.0, "step": 8506 }, { "epoch": 2.9035671616316776, "grad_norm": 0.26031702370780985, "learning_rate": 1.7766818411734952e-06, "loss": 0.219, "num_tokens": 1623427978.0, "step": 8507 }, { "epoch": 2.9039085168117427, "grad_norm": 0.2206201462984676, "learning_rate": 1.7703591299949417e-06, "loss": 0.2155, "num_tokens": 1623645941.0, "step": 8508 }, { "epoch": 2.9042498719918077, "grad_norm": 0.2233859315787149, "learning_rate": 1.7640364188163887e-06, "loss": 0.2542, "num_tokens": 1623868927.0, "step": 8509 }, { "epoch": 2.9045912271718723, "grad_norm": 0.2295162364122988, "learning_rate": 1.7577137076378353e-06, "loss": 0.2298, "num_tokens": 1624060374.0, "step": 8510 }, { "epoch": 2.904932582351937, "grad_norm": 0.2312183929055765, "learning_rate": 1.7513909964592818e-06, "loss": 0.2321, "num_tokens": 1624255273.0, "step": 8511 }, { "epoch": 2.905273937532002, "grad_norm": 0.2395108309322356, "learning_rate": 1.7450682852807286e-06, "loss": 0.2328, "num_tokens": 1624450521.0, "step": 8512 }, { "epoch": 2.905615292712067, "grad_norm": 0.21120183906128803, "learning_rate": 1.7387455741021752e-06, "loss": 0.2079, "num_tokens": 1624645502.0, "step": 8513 }, { "epoch": 2.9059566478921317, "grad_norm": 0.2187552157657377, "learning_rate": 1.7324228629236217e-06, "loss": 0.2313, "num_tokens": 1624869306.0, "step": 8514 }, { "epoch": 2.9062980030721968, "grad_norm": 0.2184425115480354, "learning_rate": 1.7261001517450685e-06, "loss": 0.2316, "num_tokens": 1625066812.0, "step": 8515 }, { "epoch": 2.9066393582522614, "grad_norm": 0.23303922305124092, "learning_rate": 1.719777440566515e-06, "loss": 0.2212, "num_tokens": 1625253511.0, "step": 8516 }, { "epoch": 2.9069807134323264, "grad_norm": 0.2542251226012017, "learning_rate": 1.7134547293879616e-06, "loss": 0.2241, "num_tokens": 1625419092.0, "step": 8517 }, { "epoch": 2.907322068612391, "grad_norm": 0.24415351666328905, "learning_rate": 1.7071320182094082e-06, "loss": 0.2276, "num_tokens": 1625603247.0, "step": 8518 }, { "epoch": 2.907663423792456, "grad_norm": 0.22574061291534622, "learning_rate": 1.700809307030855e-06, "loss": 0.2276, "num_tokens": 1625792709.0, "step": 8519 }, { "epoch": 2.9080047789725207, "grad_norm": 0.2585081692020695, "learning_rate": 1.6944865958523015e-06, "loss": 0.2343, "num_tokens": 1625977105.0, "step": 8520 }, { "epoch": 2.908346134152586, "grad_norm": 0.23551306693566718, "learning_rate": 1.688163884673748e-06, "loss": 0.2173, "num_tokens": 1626162857.0, "step": 8521 }, { "epoch": 2.9086874893326504, "grad_norm": 0.2273635505129977, "learning_rate": 1.6818411734951948e-06, "loss": 0.2186, "num_tokens": 1626354206.0, "step": 8522 }, { "epoch": 2.9090288445127155, "grad_norm": 0.23987030264505457, "learning_rate": 1.6755184623166414e-06, "loss": 0.236, "num_tokens": 1626533535.0, "step": 8523 }, { "epoch": 2.9093701996927805, "grad_norm": 0.22194732275968068, "learning_rate": 1.669195751138088e-06, "loss": 0.2232, "num_tokens": 1626714492.0, "step": 8524 }, { "epoch": 2.909711554872845, "grad_norm": 0.2360984269251073, "learning_rate": 1.662873039959535e-06, "loss": 0.2094, "num_tokens": 1626887361.0, "step": 8525 }, { "epoch": 2.91005291005291, "grad_norm": 0.2298798863481207, "learning_rate": 1.6565503287809813e-06, "loss": 0.1936, "num_tokens": 1627095697.0, "step": 8526 }, { "epoch": 2.910394265232975, "grad_norm": 0.2516637507226071, "learning_rate": 1.6502276176024278e-06, "loss": 0.2408, "num_tokens": 1627276760.0, "step": 8527 }, { "epoch": 2.91073562041304, "grad_norm": 0.24521860473625115, "learning_rate": 1.6439049064238748e-06, "loss": 0.2169, "num_tokens": 1627441620.0, "step": 8528 }, { "epoch": 2.9110769755931045, "grad_norm": 0.21265275336601944, "learning_rate": 1.6375821952453214e-06, "loss": 0.2164, "num_tokens": 1627647254.0, "step": 8529 }, { "epoch": 2.9114183307731696, "grad_norm": 0.23858426174594025, "learning_rate": 1.6312594840667677e-06, "loss": 0.2157, "num_tokens": 1627835775.0, "step": 8530 }, { "epoch": 2.911759685953234, "grad_norm": 0.21356174366460118, "learning_rate": 1.6249367728882147e-06, "loss": 0.2189, "num_tokens": 1628048921.0, "step": 8531 }, { "epoch": 2.9121010411332993, "grad_norm": 0.25423420498369326, "learning_rate": 1.6186140617096613e-06, "loss": 0.2425, "num_tokens": 1628222311.0, "step": 8532 }, { "epoch": 2.912442396313364, "grad_norm": 0.22853052728396425, "learning_rate": 1.6122913505311078e-06, "loss": 0.242, "num_tokens": 1628409279.0, "step": 8533 }, { "epoch": 2.912783751493429, "grad_norm": 0.2705137243477964, "learning_rate": 1.6059686393525542e-06, "loss": 0.2235, "num_tokens": 1628557744.0, "step": 8534 }, { "epoch": 2.913125106673494, "grad_norm": 0.22317196725189264, "learning_rate": 1.5996459281740012e-06, "loss": 0.2266, "num_tokens": 1628764617.0, "step": 8535 }, { "epoch": 2.9134664618535586, "grad_norm": 0.2403340470625523, "learning_rate": 1.5933232169954477e-06, "loss": 0.2175, "num_tokens": 1628961903.0, "step": 8536 }, { "epoch": 2.9138078170336232, "grad_norm": 0.23141831638785998, "learning_rate": 1.5870005058168943e-06, "loss": 0.2408, "num_tokens": 1629169068.0, "step": 8537 }, { "epoch": 2.9141491722136883, "grad_norm": 0.2358543581199455, "learning_rate": 1.580677794638341e-06, "loss": 0.2431, "num_tokens": 1629353932.0, "step": 8538 }, { "epoch": 2.9144905273937534, "grad_norm": 0.21808187824068073, "learning_rate": 1.5743550834597876e-06, "loss": 0.2283, "num_tokens": 1629573580.0, "step": 8539 }, { "epoch": 2.914831882573818, "grad_norm": 0.22007273219709317, "learning_rate": 1.5680323722812342e-06, "loss": 0.2373, "num_tokens": 1629788801.0, "step": 8540 }, { "epoch": 2.915173237753883, "grad_norm": 0.2192361283695381, "learning_rate": 1.561709661102681e-06, "loss": 0.2193, "num_tokens": 1630005275.0, "step": 8541 }, { "epoch": 2.9155145929339477, "grad_norm": 0.2742411585576825, "learning_rate": 1.5553869499241275e-06, "loss": 0.2268, "num_tokens": 1630210959.0, "step": 8542 }, { "epoch": 2.9158559481140127, "grad_norm": 0.2145984627469925, "learning_rate": 1.5490642387455743e-06, "loss": 0.2114, "num_tokens": 1630400971.0, "step": 8543 }, { "epoch": 2.9161973032940773, "grad_norm": 0.23575096587846966, "learning_rate": 1.5427415275670208e-06, "loss": 0.2303, "num_tokens": 1630593535.0, "step": 8544 }, { "epoch": 2.9165386584741424, "grad_norm": 0.23751688515705469, "learning_rate": 1.5364188163884674e-06, "loss": 0.2271, "num_tokens": 1630777803.0, "step": 8545 }, { "epoch": 2.9168800136542075, "grad_norm": 0.23218864705781397, "learning_rate": 1.530096105209914e-06, "loss": 0.22, "num_tokens": 1630978739.0, "step": 8546 }, { "epoch": 2.917221368834272, "grad_norm": 0.23423864183806264, "learning_rate": 1.5237733940313607e-06, "loss": 0.2495, "num_tokens": 1631178099.0, "step": 8547 }, { "epoch": 2.9175627240143367, "grad_norm": 0.2453416518545717, "learning_rate": 1.5174506828528075e-06, "loss": 0.2333, "num_tokens": 1631386814.0, "step": 8548 }, { "epoch": 2.9179040791944018, "grad_norm": 0.21877015967989769, "learning_rate": 1.5111279716742538e-06, "loss": 0.2181, "num_tokens": 1631598445.0, "step": 8549 }, { "epoch": 2.918245434374467, "grad_norm": 0.2329116369119777, "learning_rate": 1.5048052604957006e-06, "loss": 0.2539, "num_tokens": 1631808112.0, "step": 8550 }, { "epoch": 2.9185867895545314, "grad_norm": 0.22211160104879635, "learning_rate": 1.4984825493171474e-06, "loss": 0.2226, "num_tokens": 1632004659.0, "step": 8551 }, { "epoch": 2.9189281447345965, "grad_norm": 0.25646506956817433, "learning_rate": 1.492159838138594e-06, "loss": 0.2245, "num_tokens": 1632159663.0, "step": 8552 }, { "epoch": 2.919269499914661, "grad_norm": 0.24259992558174323, "learning_rate": 1.4858371269600405e-06, "loss": 0.2092, "num_tokens": 1632363586.0, "step": 8553 }, { "epoch": 2.919610855094726, "grad_norm": 0.24551780650855937, "learning_rate": 1.479514415781487e-06, "loss": 0.2021, "num_tokens": 1632543316.0, "step": 8554 }, { "epoch": 2.919952210274791, "grad_norm": 0.2214778139044245, "learning_rate": 1.4731917046029338e-06, "loss": 0.2203, "num_tokens": 1632725206.0, "step": 8555 }, { "epoch": 2.920293565454856, "grad_norm": 0.22109077322435655, "learning_rate": 1.4668689934243804e-06, "loss": 0.2212, "num_tokens": 1632912197.0, "step": 8556 }, { "epoch": 2.9206349206349205, "grad_norm": 0.22121352454919116, "learning_rate": 1.460546282245827e-06, "loss": 0.2353, "num_tokens": 1633125254.0, "step": 8557 }, { "epoch": 2.9209762758149855, "grad_norm": 0.25939148858091743, "learning_rate": 1.4542235710672737e-06, "loss": 0.2255, "num_tokens": 1633292305.0, "step": 8558 }, { "epoch": 2.92131763099505, "grad_norm": 0.24654231115922845, "learning_rate": 1.4479008598887205e-06, "loss": 0.1951, "num_tokens": 1633445942.0, "step": 8559 }, { "epoch": 2.921658986175115, "grad_norm": 0.25333433870872835, "learning_rate": 1.4415781487101668e-06, "loss": 0.2466, "num_tokens": 1633627079.0, "step": 8560 }, { "epoch": 2.9220003413551803, "grad_norm": 0.1997089817090061, "learning_rate": 1.4352554375316136e-06, "loss": 0.2041, "num_tokens": 1633843228.0, "step": 8561 }, { "epoch": 2.922341696535245, "grad_norm": 0.22120717443334836, "learning_rate": 1.4289327263530602e-06, "loss": 0.2357, "num_tokens": 1634067356.0, "step": 8562 }, { "epoch": 2.9226830517153095, "grad_norm": 0.2310297088360288, "learning_rate": 1.422610015174507e-06, "loss": 0.2109, "num_tokens": 1634275907.0, "step": 8563 }, { "epoch": 2.9230244068953746, "grad_norm": 0.22316161065716614, "learning_rate": 1.4162873039959535e-06, "loss": 0.2268, "num_tokens": 1634484094.0, "step": 8564 }, { "epoch": 2.9233657620754396, "grad_norm": 0.23499305261730966, "learning_rate": 1.4099645928174e-06, "loss": 0.194, "num_tokens": 1634643056.0, "step": 8565 }, { "epoch": 2.9237071172555043, "grad_norm": 0.24426570519347546, "learning_rate": 1.4036418816388468e-06, "loss": 0.2331, "num_tokens": 1634835576.0, "step": 8566 }, { "epoch": 2.9240484724355693, "grad_norm": 0.24563178225433197, "learning_rate": 1.3973191704602936e-06, "loss": 0.2116, "num_tokens": 1635035379.0, "step": 8567 }, { "epoch": 2.924389827615634, "grad_norm": 0.2400004106918561, "learning_rate": 1.39099645928174e-06, "loss": 0.2148, "num_tokens": 1635216733.0, "step": 8568 }, { "epoch": 2.924731182795699, "grad_norm": 0.2695591779015654, "learning_rate": 1.3846737481031867e-06, "loss": 0.216, "num_tokens": 1635393215.0, "step": 8569 }, { "epoch": 2.9250725379757636, "grad_norm": 0.2442047176708509, "learning_rate": 1.3783510369246333e-06, "loss": 0.2066, "num_tokens": 1635563652.0, "step": 8570 }, { "epoch": 2.9254138931558287, "grad_norm": 0.22717443445647362, "learning_rate": 1.37202832574608e-06, "loss": 0.2299, "num_tokens": 1635764520.0, "step": 8571 }, { "epoch": 2.9257552483358937, "grad_norm": 0.2199627882977933, "learning_rate": 1.3657056145675266e-06, "loss": 0.2123, "num_tokens": 1635953488.0, "step": 8572 }, { "epoch": 2.9260966035159583, "grad_norm": 0.23267341007170259, "learning_rate": 1.3593829033889732e-06, "loss": 0.2278, "num_tokens": 1636137401.0, "step": 8573 }, { "epoch": 2.926437958696023, "grad_norm": 0.2679822907565838, "learning_rate": 1.35306019221042e-06, "loss": 0.2357, "num_tokens": 1636317737.0, "step": 8574 }, { "epoch": 2.926779313876088, "grad_norm": 0.24149718973024928, "learning_rate": 1.3467374810318665e-06, "loss": 0.2154, "num_tokens": 1636480599.0, "step": 8575 }, { "epoch": 2.927120669056153, "grad_norm": 0.22643325758096663, "learning_rate": 1.340414769853313e-06, "loss": 0.21, "num_tokens": 1636674506.0, "step": 8576 }, { "epoch": 2.9274620242362177, "grad_norm": 0.2333974816348703, "learning_rate": 1.3340920586747598e-06, "loss": 0.2374, "num_tokens": 1636899673.0, "step": 8577 }, { "epoch": 2.9278033794162828, "grad_norm": 0.21715708211716603, "learning_rate": 1.3277693474962064e-06, "loss": 0.2431, "num_tokens": 1637134313.0, "step": 8578 }, { "epoch": 2.9281447345963474, "grad_norm": 0.2564517048121316, "learning_rate": 1.321446636317653e-06, "loss": 0.2245, "num_tokens": 1637286912.0, "step": 8579 }, { "epoch": 2.9284860897764124, "grad_norm": 0.23225911314873582, "learning_rate": 1.3151239251390997e-06, "loss": 0.2266, "num_tokens": 1637465760.0, "step": 8580 }, { "epoch": 2.928827444956477, "grad_norm": 0.23480334455274282, "learning_rate": 1.3088012139605463e-06, "loss": 0.2292, "num_tokens": 1637662348.0, "step": 8581 }, { "epoch": 2.929168800136542, "grad_norm": 0.2478001437410532, "learning_rate": 1.302478502781993e-06, "loss": 0.1918, "num_tokens": 1637820714.0, "step": 8582 }, { "epoch": 2.929510155316607, "grad_norm": 0.22910648131771047, "learning_rate": 1.2961557916034396e-06, "loss": 0.2147, "num_tokens": 1638002264.0, "step": 8583 }, { "epoch": 2.929851510496672, "grad_norm": 0.2354145137360458, "learning_rate": 1.2898330804248862e-06, "loss": 0.1946, "num_tokens": 1638165555.0, "step": 8584 }, { "epoch": 2.9301928656767364, "grad_norm": 0.24138657230358873, "learning_rate": 1.283510369246333e-06, "loss": 0.2499, "num_tokens": 1638376255.0, "step": 8585 }, { "epoch": 2.9305342208568015, "grad_norm": 0.2363027171064854, "learning_rate": 1.2771876580677795e-06, "loss": 0.232, "num_tokens": 1638560637.0, "step": 8586 }, { "epoch": 2.9308755760368665, "grad_norm": 0.24709238996906638, "learning_rate": 1.270864946889226e-06, "loss": 0.2261, "num_tokens": 1638761748.0, "step": 8587 }, { "epoch": 2.931216931216931, "grad_norm": 0.21800389355615402, "learning_rate": 1.2645422357106728e-06, "loss": 0.2269, "num_tokens": 1638964026.0, "step": 8588 }, { "epoch": 2.9315582863969962, "grad_norm": 0.23487764611142314, "learning_rate": 1.2582195245321194e-06, "loss": 0.1895, "num_tokens": 1639113555.0, "step": 8589 }, { "epoch": 2.931899641577061, "grad_norm": 0.3042165082223634, "learning_rate": 1.251896813353566e-06, "loss": 0.2164, "num_tokens": 1639303173.0, "step": 8590 }, { "epoch": 2.932240996757126, "grad_norm": 0.2249494921698065, "learning_rate": 1.2455741021750127e-06, "loss": 0.2474, "num_tokens": 1639526487.0, "step": 8591 }, { "epoch": 2.9325823519371905, "grad_norm": 0.2286380612145426, "learning_rate": 1.2392513909964593e-06, "loss": 0.2208, "num_tokens": 1639719803.0, "step": 8592 }, { "epoch": 2.9329237071172556, "grad_norm": 0.25334518976130116, "learning_rate": 1.232928679817906e-06, "loss": 0.2467, "num_tokens": 1639904884.0, "step": 8593 }, { "epoch": 2.93326506229732, "grad_norm": 0.2674759071453095, "learning_rate": 1.2266059686393526e-06, "loss": 0.2368, "num_tokens": 1640073535.0, "step": 8594 }, { "epoch": 2.9336064174773853, "grad_norm": 0.2178041533623782, "learning_rate": 1.2202832574607992e-06, "loss": 0.2184, "num_tokens": 1640274450.0, "step": 8595 }, { "epoch": 2.93394777265745, "grad_norm": 0.22317798320960078, "learning_rate": 1.213960546282246e-06, "loss": 0.2329, "num_tokens": 1640489823.0, "step": 8596 }, { "epoch": 2.934289127837515, "grad_norm": 0.2430694020520354, "learning_rate": 1.2076378351036925e-06, "loss": 0.241, "num_tokens": 1640677472.0, "step": 8597 }, { "epoch": 2.93463048301758, "grad_norm": 0.2121091089573179, "learning_rate": 1.201315123925139e-06, "loss": 0.2141, "num_tokens": 1640900008.0, "step": 8598 }, { "epoch": 2.9349718381976446, "grad_norm": 0.23629994627508197, "learning_rate": 1.1949924127465858e-06, "loss": 0.2109, "num_tokens": 1641079113.0, "step": 8599 }, { "epoch": 2.9353131933777092, "grad_norm": 0.2312403882225374, "learning_rate": 1.1886697015680324e-06, "loss": 0.2367, "num_tokens": 1641266347.0, "step": 8600 }, { "epoch": 2.9356545485577743, "grad_norm": 0.22290463308523134, "learning_rate": 1.1823469903894792e-06, "loss": 0.2082, "num_tokens": 1641467951.0, "step": 8601 }, { "epoch": 2.9359959037378394, "grad_norm": 0.24475642071650278, "learning_rate": 1.1760242792109257e-06, "loss": 0.2129, "num_tokens": 1641637680.0, "step": 8602 }, { "epoch": 2.936337258917904, "grad_norm": 0.2601756733688078, "learning_rate": 1.1697015680323723e-06, "loss": 0.2262, "num_tokens": 1641800230.0, "step": 8603 }, { "epoch": 2.936678614097969, "grad_norm": 0.2367962534020063, "learning_rate": 1.163378856853819e-06, "loss": 0.2222, "num_tokens": 1641998953.0, "step": 8604 }, { "epoch": 2.9370199692780337, "grad_norm": 0.235681267598146, "learning_rate": 1.1570561456752656e-06, "loss": 0.2154, "num_tokens": 1642169686.0, "step": 8605 }, { "epoch": 2.9373613244580987, "grad_norm": 0.24441035653899476, "learning_rate": 1.1507334344967122e-06, "loss": 0.2233, "num_tokens": 1642345258.0, "step": 8606 }, { "epoch": 2.9377026796381633, "grad_norm": 0.264485454320623, "learning_rate": 1.144410723318159e-06, "loss": 0.2072, "num_tokens": 1642502318.0, "step": 8607 }, { "epoch": 2.9380440348182284, "grad_norm": 0.25760019469354933, "learning_rate": 1.1380880121396055e-06, "loss": 0.1996, "num_tokens": 1642662412.0, "step": 8608 }, { "epoch": 2.9383853899982935, "grad_norm": 0.2346486656762236, "learning_rate": 1.131765300961052e-06, "loss": 0.2066, "num_tokens": 1642833951.0, "step": 8609 }, { "epoch": 2.938726745178358, "grad_norm": 0.2474793235326656, "learning_rate": 1.1254425897824988e-06, "loss": 0.2232, "num_tokens": 1643011285.0, "step": 8610 }, { "epoch": 2.9390681003584227, "grad_norm": 0.2411744824137888, "learning_rate": 1.1191198786039454e-06, "loss": 0.2163, "num_tokens": 1643180304.0, "step": 8611 }, { "epoch": 2.9394094555384878, "grad_norm": 0.23803941698567851, "learning_rate": 1.1127971674253922e-06, "loss": 0.2167, "num_tokens": 1643374233.0, "step": 8612 }, { "epoch": 2.939750810718553, "grad_norm": 0.2117150405304089, "learning_rate": 1.1064744562468385e-06, "loss": 0.2356, "num_tokens": 1643600844.0, "step": 8613 }, { "epoch": 2.9400921658986174, "grad_norm": 0.2376871609436305, "learning_rate": 1.1001517450682853e-06, "loss": 0.1906, "num_tokens": 1643770954.0, "step": 8614 }, { "epoch": 2.9404335210786825, "grad_norm": 0.22976221801034816, "learning_rate": 1.093829033889732e-06, "loss": 0.2407, "num_tokens": 1643976309.0, "step": 8615 }, { "epoch": 2.940774876258747, "grad_norm": 0.3299194758961, "learning_rate": 1.0875063227111786e-06, "loss": 0.2332, "num_tokens": 1644175792.0, "step": 8616 }, { "epoch": 2.941116231438812, "grad_norm": 0.2194774246658982, "learning_rate": 1.0811836115326252e-06, "loss": 0.2265, "num_tokens": 1644382513.0, "step": 8617 }, { "epoch": 2.941457586618877, "grad_norm": 0.2211941662990786, "learning_rate": 1.074860900354072e-06, "loss": 0.2141, "num_tokens": 1644583213.0, "step": 8618 }, { "epoch": 2.941798941798942, "grad_norm": 0.22955448038511223, "learning_rate": 1.0685381891755185e-06, "loss": 0.2294, "num_tokens": 1644774670.0, "step": 8619 }, { "epoch": 2.942140296979007, "grad_norm": 0.21549287100053333, "learning_rate": 1.062215477996965e-06, "loss": 0.2242, "num_tokens": 1644980866.0, "step": 8620 }, { "epoch": 2.9424816521590715, "grad_norm": 0.22643285315226705, "learning_rate": 1.0558927668184116e-06, "loss": 0.2294, "num_tokens": 1645165625.0, "step": 8621 }, { "epoch": 2.942823007339136, "grad_norm": 0.2365802685127173, "learning_rate": 1.0495700556398584e-06, "loss": 0.2086, "num_tokens": 1645325015.0, "step": 8622 }, { "epoch": 2.943164362519201, "grad_norm": 0.2380081169534112, "learning_rate": 1.0432473444613052e-06, "loss": 0.2153, "num_tokens": 1645506434.0, "step": 8623 }, { "epoch": 2.9435057176992663, "grad_norm": 0.23363551043042255, "learning_rate": 1.0369246332827517e-06, "loss": 0.2081, "num_tokens": 1645688918.0, "step": 8624 }, { "epoch": 2.943847072879331, "grad_norm": 0.23567881783761718, "learning_rate": 1.0306019221041983e-06, "loss": 0.2105, "num_tokens": 1645858011.0, "step": 8625 }, { "epoch": 2.944188428059396, "grad_norm": 0.22067254723936294, "learning_rate": 1.024279210925645e-06, "loss": 0.2147, "num_tokens": 1646058145.0, "step": 8626 }, { "epoch": 2.9445297832394606, "grad_norm": 0.21350159058500184, "learning_rate": 1.0179564997470916e-06, "loss": 0.2193, "num_tokens": 1646272275.0, "step": 8627 }, { "epoch": 2.9448711384195256, "grad_norm": 0.23854946098612567, "learning_rate": 1.0116337885685382e-06, "loss": 0.2541, "num_tokens": 1646464243.0, "step": 8628 }, { "epoch": 2.9452124935995903, "grad_norm": 0.2236017304112826, "learning_rate": 1.0053110773899848e-06, "loss": 0.2221, "num_tokens": 1646670176.0, "step": 8629 }, { "epoch": 2.9455538487796553, "grad_norm": 0.2441433136577456, "learning_rate": 9.989883662114315e-07, "loss": 0.2433, "num_tokens": 1646871261.0, "step": 8630 }, { "epoch": 2.94589520395972, "grad_norm": 0.21876390855427358, "learning_rate": 9.926656550328783e-07, "loss": 0.238, "num_tokens": 1647100143.0, "step": 8631 }, { "epoch": 2.946236559139785, "grad_norm": 0.22080687454606066, "learning_rate": 9.863429438543246e-07, "loss": 0.218, "num_tokens": 1647297642.0, "step": 8632 }, { "epoch": 2.9465779143198496, "grad_norm": 0.23821518457874108, "learning_rate": 9.800202326757714e-07, "loss": 0.2306, "num_tokens": 1647482118.0, "step": 8633 }, { "epoch": 2.9469192694999147, "grad_norm": 0.23519666173382914, "learning_rate": 9.736975214972182e-07, "loss": 0.2162, "num_tokens": 1647663347.0, "step": 8634 }, { "epoch": 2.9472606246799797, "grad_norm": 0.2471561417612621, "learning_rate": 9.673748103186647e-07, "loss": 0.2316, "num_tokens": 1647846226.0, "step": 8635 }, { "epoch": 2.9476019798600444, "grad_norm": 0.2203462033674019, "learning_rate": 9.610520991401113e-07, "loss": 0.2229, "num_tokens": 1648052986.0, "step": 8636 }, { "epoch": 2.947943335040109, "grad_norm": 0.2344991999876488, "learning_rate": 9.547293879615579e-07, "loss": 0.2302, "num_tokens": 1648277675.0, "step": 8637 }, { "epoch": 2.948284690220174, "grad_norm": 0.24246768674529254, "learning_rate": 9.484066767830046e-07, "loss": 0.2275, "num_tokens": 1648468931.0, "step": 8638 }, { "epoch": 2.948626045400239, "grad_norm": 0.2422531459288565, "learning_rate": 9.420839656044513e-07, "loss": 0.2115, "num_tokens": 1648630033.0, "step": 8639 }, { "epoch": 2.9489674005803037, "grad_norm": 0.22784371491562153, "learning_rate": 9.357612544258979e-07, "loss": 0.2212, "num_tokens": 1648833542.0, "step": 8640 }, { "epoch": 2.9493087557603688, "grad_norm": 0.24047760744093838, "learning_rate": 9.294385432473445e-07, "loss": 0.2068, "num_tokens": 1649005388.0, "step": 8641 }, { "epoch": 2.9496501109404334, "grad_norm": 0.22878424286617954, "learning_rate": 9.231158320687912e-07, "loss": 0.2231, "num_tokens": 1649196039.0, "step": 8642 }, { "epoch": 2.9499914661204985, "grad_norm": 0.249320659760206, "learning_rate": 9.167931208902377e-07, "loss": 0.246, "num_tokens": 1649380555.0, "step": 8643 }, { "epoch": 2.950332821300563, "grad_norm": 0.23934243695444524, "learning_rate": 9.104704097116844e-07, "loss": 0.2279, "num_tokens": 1649554568.0, "step": 8644 }, { "epoch": 2.950674176480628, "grad_norm": 0.2755938858676825, "learning_rate": 9.04147698533131e-07, "loss": 0.2424, "num_tokens": 1649719115.0, "step": 8645 }, { "epoch": 2.951015531660693, "grad_norm": 0.24783499401207848, "learning_rate": 8.978249873545776e-07, "loss": 0.2001, "num_tokens": 1649879953.0, "step": 8646 }, { "epoch": 2.951356886840758, "grad_norm": 0.22645699381999113, "learning_rate": 8.915022761760244e-07, "loss": 0.2324, "num_tokens": 1650081997.0, "step": 8647 }, { "epoch": 2.9516982420208224, "grad_norm": 0.21531439715044623, "learning_rate": 8.851795649974709e-07, "loss": 0.2317, "num_tokens": 1650291216.0, "step": 8648 }, { "epoch": 2.9520395972008875, "grad_norm": 0.252912210730185, "learning_rate": 8.788568538189176e-07, "loss": 0.2225, "num_tokens": 1650466944.0, "step": 8649 }, { "epoch": 2.9523809523809526, "grad_norm": 0.22824516504077663, "learning_rate": 8.725341426403643e-07, "loss": 0.2239, "num_tokens": 1650659768.0, "step": 8650 }, { "epoch": 2.952722307561017, "grad_norm": 0.22275146995123227, "learning_rate": 8.662114314618109e-07, "loss": 0.2173, "num_tokens": 1650857892.0, "step": 8651 }, { "epoch": 2.9530636627410822, "grad_norm": 0.22294428745707962, "learning_rate": 8.598887202832575e-07, "loss": 0.2231, "num_tokens": 1651048095.0, "step": 8652 }, { "epoch": 2.953405017921147, "grad_norm": 0.24771522317340122, "learning_rate": 8.535660091047041e-07, "loss": 0.2403, "num_tokens": 1651227695.0, "step": 8653 }, { "epoch": 2.953746373101212, "grad_norm": 0.20695650697201645, "learning_rate": 8.472432979261508e-07, "loss": 0.2396, "num_tokens": 1651454422.0, "step": 8654 }, { "epoch": 2.9540877282812765, "grad_norm": 0.25671996334411135, "learning_rate": 8.409205867475974e-07, "loss": 0.2274, "num_tokens": 1651626619.0, "step": 8655 }, { "epoch": 2.9544290834613416, "grad_norm": 0.2548324054298919, "learning_rate": 8.34597875569044e-07, "loss": 0.2177, "num_tokens": 1651782739.0, "step": 8656 }, { "epoch": 2.9547704386414066, "grad_norm": 0.24439626220763344, "learning_rate": 8.282751643904906e-07, "loss": 0.216, "num_tokens": 1652004680.0, "step": 8657 }, { "epoch": 2.9551117938214713, "grad_norm": 0.23974357219734244, "learning_rate": 8.219524532119374e-07, "loss": 0.2086, "num_tokens": 1652168489.0, "step": 8658 }, { "epoch": 2.955453149001536, "grad_norm": 0.20573469782364728, "learning_rate": 8.156297420333839e-07, "loss": 0.2453, "num_tokens": 1652381655.0, "step": 8659 }, { "epoch": 2.955794504181601, "grad_norm": 0.25268523199479787, "learning_rate": 8.093070308548306e-07, "loss": 0.2292, "num_tokens": 1652571946.0, "step": 8660 }, { "epoch": 2.956135859361666, "grad_norm": 0.27341737515152764, "learning_rate": 8.029843196762771e-07, "loss": 0.2125, "num_tokens": 1652738054.0, "step": 8661 }, { "epoch": 2.9564772145417306, "grad_norm": 0.25056743986290664, "learning_rate": 7.966616084977239e-07, "loss": 0.2142, "num_tokens": 1652917236.0, "step": 8662 }, { "epoch": 2.9568185697217957, "grad_norm": 0.26437071241248955, "learning_rate": 7.903388973191705e-07, "loss": 0.2496, "num_tokens": 1653106306.0, "step": 8663 }, { "epoch": 2.9571599249018603, "grad_norm": 0.2519061078830046, "learning_rate": 7.840161861406171e-07, "loss": 0.2023, "num_tokens": 1653280752.0, "step": 8664 }, { "epoch": 2.9575012800819254, "grad_norm": 0.21198154609045464, "learning_rate": 7.776934749620638e-07, "loss": 0.2422, "num_tokens": 1653520791.0, "step": 8665 }, { "epoch": 2.95784263526199, "grad_norm": 0.2377787471057131, "learning_rate": 7.713707637835104e-07, "loss": 0.1979, "num_tokens": 1653686403.0, "step": 8666 }, { "epoch": 2.958183990442055, "grad_norm": 0.2232461443145014, "learning_rate": 7.65048052604957e-07, "loss": 0.2307, "num_tokens": 1653896432.0, "step": 8667 }, { "epoch": 2.9585253456221197, "grad_norm": 0.24039972783972405, "learning_rate": 7.587253414264037e-07, "loss": 0.2276, "num_tokens": 1654067798.0, "step": 8668 }, { "epoch": 2.9588667008021847, "grad_norm": 0.22478600081337563, "learning_rate": 7.524026302478503e-07, "loss": 0.215, "num_tokens": 1654283052.0, "step": 8669 }, { "epoch": 2.9592080559822493, "grad_norm": 0.22772917782000024, "learning_rate": 7.46079919069297e-07, "loss": 0.1943, "num_tokens": 1654463928.0, "step": 8670 }, { "epoch": 2.9595494111623144, "grad_norm": 0.2366136250451134, "learning_rate": 7.397572078907435e-07, "loss": 0.2214, "num_tokens": 1654638081.0, "step": 8671 }, { "epoch": 2.9598907663423795, "grad_norm": 0.2546407589636585, "learning_rate": 7.334344967121902e-07, "loss": 0.2151, "num_tokens": 1654809046.0, "step": 8672 }, { "epoch": 2.960232121522444, "grad_norm": 0.2264531926113152, "learning_rate": 7.271117855336369e-07, "loss": 0.2122, "num_tokens": 1654980608.0, "step": 8673 }, { "epoch": 2.9605734767025087, "grad_norm": 0.2368993497175594, "learning_rate": 7.207890743550834e-07, "loss": 0.2108, "num_tokens": 1655162594.0, "step": 8674 }, { "epoch": 2.9609148318825738, "grad_norm": 0.2343098080978504, "learning_rate": 7.144663631765301e-07, "loss": 0.2069, "num_tokens": 1655336295.0, "step": 8675 }, { "epoch": 2.961256187062639, "grad_norm": 0.2561813084018342, "learning_rate": 7.081436519979768e-07, "loss": 0.221, "num_tokens": 1655502826.0, "step": 8676 }, { "epoch": 2.9615975422427034, "grad_norm": 0.22492003498281024, "learning_rate": 7.018209408194234e-07, "loss": 0.2204, "num_tokens": 1655684751.0, "step": 8677 }, { "epoch": 2.9619388974227685, "grad_norm": 0.250568309388527, "learning_rate": 6.9549822964087e-07, "loss": 0.2413, "num_tokens": 1655879424.0, "step": 8678 }, { "epoch": 2.962280252602833, "grad_norm": 0.24123026932597594, "learning_rate": 6.891755184623166e-07, "loss": 0.2229, "num_tokens": 1656059652.0, "step": 8679 }, { "epoch": 2.962621607782898, "grad_norm": 0.3500807405132164, "learning_rate": 6.828528072837633e-07, "loss": 0.2334, "num_tokens": 1656233961.0, "step": 8680 }, { "epoch": 2.962962962962963, "grad_norm": 0.25112206682617927, "learning_rate": 6.7653009610521e-07, "loss": 0.2139, "num_tokens": 1656415006.0, "step": 8681 }, { "epoch": 2.963304318143028, "grad_norm": 0.2267734052744925, "learning_rate": 6.702073849266565e-07, "loss": 0.2204, "num_tokens": 1656600824.0, "step": 8682 }, { "epoch": 2.963645673323093, "grad_norm": 0.23196599241423868, "learning_rate": 6.638846737481032e-07, "loss": 0.2172, "num_tokens": 1656800435.0, "step": 8683 }, { "epoch": 2.9639870285031575, "grad_norm": 0.23490921254163616, "learning_rate": 6.575619625695499e-07, "loss": 0.2104, "num_tokens": 1656977168.0, "step": 8684 }, { "epoch": 2.964328383683222, "grad_norm": 0.22115182849693787, "learning_rate": 6.512392513909965e-07, "loss": 0.2378, "num_tokens": 1657188090.0, "step": 8685 }, { "epoch": 2.964669738863287, "grad_norm": 0.22610859407586503, "learning_rate": 6.449165402124431e-07, "loss": 0.2199, "num_tokens": 1657391717.0, "step": 8686 }, { "epoch": 2.9650110940433523, "grad_norm": 0.25872649361392963, "learning_rate": 6.385938290338898e-07, "loss": 0.226, "num_tokens": 1657556742.0, "step": 8687 }, { "epoch": 2.965352449223417, "grad_norm": 0.2493896895809153, "learning_rate": 6.322711178553364e-07, "loss": 0.2322, "num_tokens": 1657722224.0, "step": 8688 }, { "epoch": 2.965693804403482, "grad_norm": 0.22775214301743743, "learning_rate": 6.25948406676783e-07, "loss": 0.2408, "num_tokens": 1657917284.0, "step": 8689 }, { "epoch": 2.9660351595835466, "grad_norm": 0.2514440002773841, "learning_rate": 6.196256954982296e-07, "loss": 0.2113, "num_tokens": 1658094819.0, "step": 8690 }, { "epoch": 2.9663765147636116, "grad_norm": 0.22466536885765243, "learning_rate": 6.133029843196763e-07, "loss": 0.2047, "num_tokens": 1658269631.0, "step": 8691 }, { "epoch": 2.9667178699436763, "grad_norm": 0.2406298058754231, "learning_rate": 6.06980273141123e-07, "loss": 0.198, "num_tokens": 1658427175.0, "step": 8692 }, { "epoch": 2.9670592251237413, "grad_norm": 0.2248645991796829, "learning_rate": 6.006575619625695e-07, "loss": 0.2313, "num_tokens": 1658642588.0, "step": 8693 }, { "epoch": 2.9674005803038064, "grad_norm": 0.22844129176076924, "learning_rate": 5.943348507840162e-07, "loss": 0.2346, "num_tokens": 1658854064.0, "step": 8694 }, { "epoch": 2.967741935483871, "grad_norm": 0.2584272257126433, "learning_rate": 5.880121396054629e-07, "loss": 0.2118, "num_tokens": 1659028111.0, "step": 8695 }, { "epoch": 2.9680832906639356, "grad_norm": 0.22640352383366388, "learning_rate": 5.816894284269095e-07, "loss": 0.236, "num_tokens": 1659251636.0, "step": 8696 }, { "epoch": 2.9684246458440007, "grad_norm": 0.2668646700329678, "learning_rate": 5.753667172483561e-07, "loss": 0.2351, "num_tokens": 1659428370.0, "step": 8697 }, { "epoch": 2.9687660010240657, "grad_norm": 0.22646413244270577, "learning_rate": 5.690440060698028e-07, "loss": 0.2135, "num_tokens": 1659656264.0, "step": 8698 }, { "epoch": 2.9691073562041304, "grad_norm": 0.2225316412278156, "learning_rate": 5.627212948912494e-07, "loss": 0.2259, "num_tokens": 1659867876.0, "step": 8699 }, { "epoch": 2.9694487113841954, "grad_norm": 0.23873197611278554, "learning_rate": 5.563985837126961e-07, "loss": 0.2028, "num_tokens": 1660045741.0, "step": 8700 }, { "epoch": 2.96979006656426, "grad_norm": 0.2179049649248015, "learning_rate": 5.500758725341426e-07, "loss": 0.2194, "num_tokens": 1660255955.0, "step": 8701 }, { "epoch": 2.970131421744325, "grad_norm": 0.2775346852586481, "learning_rate": 5.437531613555893e-07, "loss": 0.2308, "num_tokens": 1660396330.0, "step": 8702 }, { "epoch": 2.9704727769243897, "grad_norm": 0.23467506192403043, "learning_rate": 5.37430450177036e-07, "loss": 0.2113, "num_tokens": 1660571690.0, "step": 8703 }, { "epoch": 2.9708141321044548, "grad_norm": 0.21498650267130373, "learning_rate": 5.311077389984825e-07, "loss": 0.2275, "num_tokens": 1660775770.0, "step": 8704 }, { "epoch": 2.9711554872845194, "grad_norm": 0.24826753437700677, "learning_rate": 5.247850278199292e-07, "loss": 0.2407, "num_tokens": 1660950806.0, "step": 8705 }, { "epoch": 2.9714968424645845, "grad_norm": 0.2312963356566757, "learning_rate": 5.184623166413759e-07, "loss": 0.2299, "num_tokens": 1661129715.0, "step": 8706 }, { "epoch": 2.971838197644649, "grad_norm": 0.25056376504710803, "learning_rate": 5.121396054628225e-07, "loss": 0.22, "num_tokens": 1661312145.0, "step": 8707 }, { "epoch": 2.972179552824714, "grad_norm": 0.23320280032433086, "learning_rate": 5.058168942842691e-07, "loss": 0.2235, "num_tokens": 1661511192.0, "step": 8708 }, { "epoch": 2.972520908004779, "grad_norm": 0.21948364650580116, "learning_rate": 4.994941831057158e-07, "loss": 0.2197, "num_tokens": 1661713693.0, "step": 8709 }, { "epoch": 2.972862263184844, "grad_norm": 0.22998924803218418, "learning_rate": 4.931714719271623e-07, "loss": 0.2237, "num_tokens": 1661904777.0, "step": 8710 }, { "epoch": 2.9732036183649084, "grad_norm": 0.203100708357231, "learning_rate": 4.868487607486091e-07, "loss": 0.2315, "num_tokens": 1662141685.0, "step": 8711 }, { "epoch": 2.9735449735449735, "grad_norm": 0.29089994330558955, "learning_rate": 4.805260495700557e-07, "loss": 0.2268, "num_tokens": 1662308844.0, "step": 8712 }, { "epoch": 2.9738863287250386, "grad_norm": 0.22472914616552242, "learning_rate": 4.742033383915023e-07, "loss": 0.2097, "num_tokens": 1662489403.0, "step": 8713 }, { "epoch": 2.974227683905103, "grad_norm": 0.2364102622986434, "learning_rate": 4.6788062721294893e-07, "loss": 0.1989, "num_tokens": 1662657949.0, "step": 8714 }, { "epoch": 2.9745690390851682, "grad_norm": 0.2599955359868198, "learning_rate": 4.615579160343956e-07, "loss": 0.2242, "num_tokens": 1662836571.0, "step": 8715 }, { "epoch": 2.974910394265233, "grad_norm": 0.20066068210370255, "learning_rate": 4.552352048558422e-07, "loss": 0.2203, "num_tokens": 1663055549.0, "step": 8716 }, { "epoch": 2.975251749445298, "grad_norm": 0.27420352930673914, "learning_rate": 4.489124936772888e-07, "loss": 0.2258, "num_tokens": 1663242395.0, "step": 8717 }, { "epoch": 2.9755931046253625, "grad_norm": 0.21986170497945545, "learning_rate": 4.4258978249873543e-07, "loss": 0.2208, "num_tokens": 1663440208.0, "step": 8718 }, { "epoch": 2.9759344598054276, "grad_norm": 0.22403558051698216, "learning_rate": 4.3626707132018215e-07, "loss": 0.2206, "num_tokens": 1663642405.0, "step": 8719 }, { "epoch": 2.9762758149854927, "grad_norm": 0.23352856631292268, "learning_rate": 4.2994436014162876e-07, "loss": 0.2155, "num_tokens": 1663822516.0, "step": 8720 }, { "epoch": 2.9766171701655573, "grad_norm": 0.2410778287042192, "learning_rate": 4.236216489630754e-07, "loss": 0.2326, "num_tokens": 1664009085.0, "step": 8721 }, { "epoch": 2.976958525345622, "grad_norm": 0.2075374984427545, "learning_rate": 4.17298937784522e-07, "loss": 0.2558, "num_tokens": 1664256167.0, "step": 8722 }, { "epoch": 2.977299880525687, "grad_norm": 0.22675645617953094, "learning_rate": 4.109762266059687e-07, "loss": 0.2405, "num_tokens": 1664465923.0, "step": 8723 }, { "epoch": 2.977641235705752, "grad_norm": 0.20161161276303902, "learning_rate": 4.046535154274153e-07, "loss": 0.2206, "num_tokens": 1664697765.0, "step": 8724 }, { "epoch": 2.9779825908858166, "grad_norm": 0.23458319671700983, "learning_rate": 3.9833080424886193e-07, "loss": 0.2032, "num_tokens": 1664882192.0, "step": 8725 }, { "epoch": 2.9783239460658817, "grad_norm": 0.22967488798632948, "learning_rate": 3.9200809307030854e-07, "loss": 0.2229, "num_tokens": 1665077861.0, "step": 8726 }, { "epoch": 2.9786653012459463, "grad_norm": 0.23146141089778183, "learning_rate": 3.856853818917552e-07, "loss": 0.2146, "num_tokens": 1665253289.0, "step": 8727 }, { "epoch": 2.9790066564260114, "grad_norm": 0.22038677337946988, "learning_rate": 3.793626707132019e-07, "loss": 0.2131, "num_tokens": 1665455975.0, "step": 8728 }, { "epoch": 2.979348011606076, "grad_norm": 0.23877594988473652, "learning_rate": 3.730399595346485e-07, "loss": 0.2201, "num_tokens": 1665661404.0, "step": 8729 }, { "epoch": 2.979689366786141, "grad_norm": 0.24578411830254163, "learning_rate": 3.667172483560951e-07, "loss": 0.231, "num_tokens": 1665844436.0, "step": 8730 }, { "epoch": 2.980030721966206, "grad_norm": 0.23507226347469584, "learning_rate": 3.603945371775417e-07, "loss": 0.2327, "num_tokens": 1666044921.0, "step": 8731 }, { "epoch": 2.9803720771462707, "grad_norm": 0.22366384646027948, "learning_rate": 3.540718259989884e-07, "loss": 0.2263, "num_tokens": 1666235523.0, "step": 8732 }, { "epoch": 2.9807134323263353, "grad_norm": 0.23581420365856215, "learning_rate": 3.47749114820435e-07, "loss": 0.2408, "num_tokens": 1666437015.0, "step": 8733 }, { "epoch": 2.9810547875064004, "grad_norm": 0.21693371300563097, "learning_rate": 3.4142640364188166e-07, "loss": 0.2152, "num_tokens": 1666627573.0, "step": 8734 }, { "epoch": 2.9813961426864655, "grad_norm": 0.24056078352147836, "learning_rate": 3.3510369246332827e-07, "loss": 0.2085, "num_tokens": 1666806234.0, "step": 8735 }, { "epoch": 2.98173749786653, "grad_norm": 0.23735363612401902, "learning_rate": 3.2878098128477493e-07, "loss": 0.2156, "num_tokens": 1666964190.0, "step": 8736 }, { "epoch": 2.982078853046595, "grad_norm": 0.2192528262044036, "learning_rate": 3.2245827010622155e-07, "loss": 0.2368, "num_tokens": 1667180000.0, "step": 8737 }, { "epoch": 2.9824202082266598, "grad_norm": 0.2134043210114965, "learning_rate": 3.161355589276682e-07, "loss": 0.2294, "num_tokens": 1667412584.0, "step": 8738 }, { "epoch": 2.982761563406725, "grad_norm": 0.20048775824801224, "learning_rate": 3.098128477491148e-07, "loss": 0.2173, "num_tokens": 1667630013.0, "step": 8739 }, { "epoch": 2.9831029185867894, "grad_norm": 0.23359421472591452, "learning_rate": 3.034901365705615e-07, "loss": 0.2122, "num_tokens": 1667806249.0, "step": 8740 }, { "epoch": 2.9834442737668545, "grad_norm": 0.22744660760874089, "learning_rate": 2.971674253920081e-07, "loss": 0.2258, "num_tokens": 1668023147.0, "step": 8741 }, { "epoch": 2.983785628946919, "grad_norm": 0.2219289330685839, "learning_rate": 2.9084471421345477e-07, "loss": 0.2226, "num_tokens": 1668242287.0, "step": 8742 }, { "epoch": 2.984126984126984, "grad_norm": 0.22769629553270068, "learning_rate": 2.845220030349014e-07, "loss": 0.2298, "num_tokens": 1668440026.0, "step": 8743 }, { "epoch": 2.984468339307049, "grad_norm": 0.21362718652781246, "learning_rate": 2.7819929185634805e-07, "loss": 0.2188, "num_tokens": 1668645509.0, "step": 8744 }, { "epoch": 2.984809694487114, "grad_norm": 0.22210503288752745, "learning_rate": 2.7187658067779466e-07, "loss": 0.2232, "num_tokens": 1668852464.0, "step": 8745 }, { "epoch": 2.985151049667179, "grad_norm": 0.22203373288787645, "learning_rate": 2.6555386949924127e-07, "loss": 0.2117, "num_tokens": 1669061075.0, "step": 8746 }, { "epoch": 2.9854924048472435, "grad_norm": 0.2582789055913045, "learning_rate": 2.5923115832068794e-07, "loss": 0.2095, "num_tokens": 1669224282.0, "step": 8747 }, { "epoch": 2.985833760027308, "grad_norm": 0.24135137835971845, "learning_rate": 2.5290844714213455e-07, "loss": 0.2085, "num_tokens": 1669406718.0, "step": 8748 }, { "epoch": 2.986175115207373, "grad_norm": 0.22904463982920692, "learning_rate": 2.4658573596358116e-07, "loss": 0.2005, "num_tokens": 1669583705.0, "step": 8749 }, { "epoch": 2.9865164703874383, "grad_norm": 0.25525509126561813, "learning_rate": 2.402630247850278e-07, "loss": 0.2134, "num_tokens": 1669787880.0, "step": 8750 }, { "epoch": 2.986857825567503, "grad_norm": 0.23654211375867515, "learning_rate": 2.3394031360647446e-07, "loss": 0.2002, "num_tokens": 1669951857.0, "step": 8751 }, { "epoch": 2.987199180747568, "grad_norm": 0.23433504410202838, "learning_rate": 2.276176024279211e-07, "loss": 0.2269, "num_tokens": 1670143253.0, "step": 8752 }, { "epoch": 2.9875405359276326, "grad_norm": 0.22034220952023287, "learning_rate": 2.2129489124936772e-07, "loss": 0.2206, "num_tokens": 1670346370.0, "step": 8753 }, { "epoch": 2.9878818911076976, "grad_norm": 0.2216537225701378, "learning_rate": 2.1497218007081438e-07, "loss": 0.2224, "num_tokens": 1670539802.0, "step": 8754 }, { "epoch": 2.9882232462877623, "grad_norm": 0.2223069581163433, "learning_rate": 2.08649468892261e-07, "loss": 0.2394, "num_tokens": 1670759525.0, "step": 8755 }, { "epoch": 2.9885646014678273, "grad_norm": 0.23720834108102234, "learning_rate": 2.0232675771370766e-07, "loss": 0.216, "num_tokens": 1670937124.0, "step": 8756 }, { "epoch": 2.9889059566478924, "grad_norm": 0.23048427653538675, "learning_rate": 1.9600404653515427e-07, "loss": 0.2126, "num_tokens": 1671117630.0, "step": 8757 }, { "epoch": 2.989247311827957, "grad_norm": 0.256522284461548, "learning_rate": 1.8968133535660094e-07, "loss": 0.237, "num_tokens": 1671334296.0, "step": 8758 }, { "epoch": 2.9895886670080216, "grad_norm": 0.24673093344043198, "learning_rate": 1.8335862417804755e-07, "loss": 0.2161, "num_tokens": 1671510786.0, "step": 8759 }, { "epoch": 2.9899300221880867, "grad_norm": 0.22826297827370529, "learning_rate": 1.770359129994942e-07, "loss": 0.2443, "num_tokens": 1671708640.0, "step": 8760 }, { "epoch": 2.9902713773681517, "grad_norm": 0.2547725500576765, "learning_rate": 1.7071320182094083e-07, "loss": 0.2227, "num_tokens": 1671878610.0, "step": 8761 }, { "epoch": 2.9906127325482164, "grad_norm": 0.20873463318809887, "learning_rate": 1.6439049064238747e-07, "loss": 0.2348, "num_tokens": 1672122644.0, "step": 8762 }, { "epoch": 2.9909540877282814, "grad_norm": 0.21004651491669601, "learning_rate": 1.580677794638341e-07, "loss": 0.2191, "num_tokens": 1672338562.0, "step": 8763 }, { "epoch": 2.991295442908346, "grad_norm": 0.24689390955676968, "learning_rate": 1.5174506828528074e-07, "loss": 0.2259, "num_tokens": 1672511551.0, "step": 8764 }, { "epoch": 2.991636798088411, "grad_norm": 0.2175753462830134, "learning_rate": 1.4542235710672738e-07, "loss": 0.2172, "num_tokens": 1672701986.0, "step": 8765 }, { "epoch": 2.9919781532684757, "grad_norm": 0.22984430232246517, "learning_rate": 1.3909964592817402e-07, "loss": 0.2127, "num_tokens": 1672885817.0, "step": 8766 }, { "epoch": 2.9923195084485408, "grad_norm": 0.24962321770733042, "learning_rate": 1.3277693474962064e-07, "loss": 0.2093, "num_tokens": 1673040975.0, "step": 8767 }, { "epoch": 2.992660863628606, "grad_norm": 0.22582517369558033, "learning_rate": 1.2645422357106727e-07, "loss": 0.2162, "num_tokens": 1673263464.0, "step": 8768 }, { "epoch": 2.9930022188086705, "grad_norm": 0.23347444578779306, "learning_rate": 1.201315123925139e-07, "loss": 0.225, "num_tokens": 1673461659.0, "step": 8769 }, { "epoch": 2.993343573988735, "grad_norm": 0.2198587328358716, "learning_rate": 1.1380880121396055e-07, "loss": 0.2291, "num_tokens": 1673674659.0, "step": 8770 }, { "epoch": 2.9936849291688, "grad_norm": 0.22479739642685034, "learning_rate": 1.0748609003540719e-07, "loss": 0.2326, "num_tokens": 1673879734.0, "step": 8771 }, { "epoch": 2.994026284348865, "grad_norm": 0.2430337411469645, "learning_rate": 1.0116337885685383e-07, "loss": 0.2022, "num_tokens": 1674045390.0, "step": 8772 }, { "epoch": 2.99436763952893, "grad_norm": 0.21578017337383348, "learning_rate": 9.484066767830047e-08, "loss": 0.2383, "num_tokens": 1674267037.0, "step": 8773 }, { "epoch": 2.9947089947089944, "grad_norm": 0.25092763171503457, "learning_rate": 8.85179564997471e-08, "loss": 0.202, "num_tokens": 1674428835.0, "step": 8774 }, { "epoch": 2.9950503498890595, "grad_norm": 0.2517662139958636, "learning_rate": 8.219524532119373e-08, "loss": 0.1993, "num_tokens": 1674592268.0, "step": 8775 }, { "epoch": 2.9953917050691246, "grad_norm": 0.21601778075261133, "learning_rate": 7.587253414264037e-08, "loss": 0.2207, "num_tokens": 1674805844.0, "step": 8776 }, { "epoch": 2.995733060249189, "grad_norm": 0.21949346399470662, "learning_rate": 6.954982296408701e-08, "loss": 0.2278, "num_tokens": 1675011137.0, "step": 8777 }, { "epoch": 2.9960744154292542, "grad_norm": 0.2539002223097878, "learning_rate": 6.322711178553364e-08, "loss": 0.194, "num_tokens": 1675163764.0, "step": 8778 }, { "epoch": 2.996415770609319, "grad_norm": 0.23864110060500607, "learning_rate": 5.6904400606980276e-08, "loss": 0.2174, "num_tokens": 1675364538.0, "step": 8779 }, { "epoch": 2.996757125789384, "grad_norm": 0.23223081450344016, "learning_rate": 5.0581689428426915e-08, "loss": 0.2295, "num_tokens": 1675559353.0, "step": 8780 }, { "epoch": 2.9970984809694485, "grad_norm": 0.2547412302088148, "learning_rate": 4.425897824987355e-08, "loss": 0.2199, "num_tokens": 1675724386.0, "step": 8781 }, { "epoch": 2.9974398361495136, "grad_norm": 0.2627244331274078, "learning_rate": 3.7936267071320186e-08, "loss": 0.215, "num_tokens": 1675913679.0, "step": 8782 }, { "epoch": 2.9977811913295787, "grad_norm": 0.22886882979179732, "learning_rate": 3.161355589276682e-08, "loss": 0.2338, "num_tokens": 1676122505.0, "step": 8783 }, { "epoch": 2.9981225465096433, "grad_norm": 0.2617057739381788, "learning_rate": 2.5290844714213457e-08, "loss": 0.218, "num_tokens": 1676279584.0, "step": 8784 }, { "epoch": 2.998463901689708, "grad_norm": 0.2759027305165466, "learning_rate": 1.8968133535660093e-08, "loss": 0.2244, "num_tokens": 1676417432.0, "step": 8785 }, { "epoch": 2.998805256869773, "grad_norm": 0.24833714288524467, "learning_rate": 1.2645422357106729e-08, "loss": 0.2026, "num_tokens": 1676594421.0, "step": 8786 }, { "epoch": 2.999146612049838, "grad_norm": 0.21844382526966383, "learning_rate": 6.322711178553364e-09, "loss": 0.2224, "num_tokens": 1676801756.0, "step": 8787 }, { "epoch": 2.999146612049838, "step": 8787, "total_flos": 4.249743249117741e+18, "train_loss": 0.3027180843966751, "train_runtime": 80986.1439, "train_samples_per_second": 3.472, "train_steps_per_second": 0.109 } ], "logging_steps": 1, "max_steps": 8787, "num_input_tokens_seen": 0, "num_train_epochs": 3, "save_steps": 1000, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 4.249743249117741e+18, "train_batch_size": 2, "trial_name": null, "trial_params": null }